From 1f502bd9d7d2c1f98ad93a09ffe435e11a95aedd Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Wed, 18 Jun 2014 20:17:35 +0000 Subject: [PATCH 0001/1155] Replace Execution Engine's mutex with std::recursive_mutex. This change has a bit of a trickle down effect due to the fact that there are a number of derived implementations of ExecutionEngine, and that the mutex is not tightly encapsulated so is used by other classes directly. Reviewed by: rnk Differential Revision: http://reviews.llvm.org/D4196 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211214 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/ExecutionEngine/ExecutionEngine.h | 7 ++-- include/llvm/IR/ValueMap.h | 12 +++--- lib/ExecutionEngine/ExecutionEngine.cpp | 16 ++++---- lib/ExecutionEngine/JIT/JIT.cpp | 38 +++++++++---------- lib/ExecutionEngine/JIT/JITEmitter.cpp | 22 +++++------ lib/ExecutionEngine/MCJIT/MCJIT.cpp | 37 +++++++++--------- unittests/IR/ValueMapTest.cpp | 8 ++-- 7 files changed, 69 insertions(+), 71 deletions(-) diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index e5dab6191ab6..d349af810650 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -22,10 +22,10 @@ #include "llvm/IR/ValueMap.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Mutex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include +#include #include #include @@ -42,7 +42,6 @@ class JITEventListener; class JITMemoryManager; class MachineCodeInfo; class Module; -class MutexGuard; class ObjectCache; class RTDyldMemoryManager; class Triple; @@ -59,7 +58,7 @@ class ExecutionEngineState { public: struct AddressMapConfig : public ValueMapConfig { typedef ExecutionEngineState *ExtraData; - static sys::Mutex *getMutex(ExecutionEngineState *EES); + static std::recursive_mutex *getMutex(ExecutionEngineState *EES); static void onDelete(ExecutionEngineState *EES, const GlobalValue *Old); static void onRAUW(ExecutionEngineState *, const GlobalValue *, const GlobalValue *); @@ -164,7 +163,7 @@ class ExecutionEngine { /// lock - This lock protects the ExecutionEngine, MCJIT, JIT, JITResolver and /// JITEmitter classes. It must be held while changing the internal state of /// any of those classes. - sys::Mutex lock; + std::recursive_mutex lock; //===--------------------------------------------------------------------===// // ExecutionEngine Startup diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index f196f334b605..17b0fe01b116 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -28,9 +28,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Mutex.h" #include "llvm/Support/type_traits.h" #include +#include namespace llvm { @@ -45,7 +45,7 @@ class ValueMapConstIterator; /// This class defines the default behavior for configurable aspects of /// ValueMap<>. User Configs should inherit from this class to be as compatible /// as possible with future versions of ValueMap. -template +template struct ValueMapConfig { typedef MutexT mutex_type; @@ -216,11 +216,11 @@ class ValueMapCallbackVH : public CallbackVH { ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); if (M) - M->acquire(); + M->lock(); Config::onDelete(Copy.Map->Data, Copy.Unwrap()); // May destroy *this. Copy.Map->Map.erase(Copy); // Definitely destroys *this. if (M) - M->release(); + M->unlock(); } void allUsesReplacedWith(Value *new_key) override { assert(isa(new_key) && @@ -229,7 +229,7 @@ class ValueMapCallbackVH : public CallbackVH { ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); if (M) - M->acquire(); + M->lock(); KeyT typed_new_key = cast(new_key); // Can destroy *this: @@ -245,7 +245,7 @@ class ValueMapCallbackVH : public CallbackVH { } } if (M) - M->release(); + M->unlock(); } }; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 9154fe2f5ff4..553ceb4575d5 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -166,7 +166,7 @@ void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) { } void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); DEBUG(dbgs() << "JIT: Map \'" << GV->getName() << "\' to [" << Addr << "]\n";); @@ -184,14 +184,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { } void ExecutionEngine::clearAllGlobalMappings() { - MutexGuard locked(lock); + std::lock_guard locked(lock); EEState.getGlobalAddressMap().clear(); EEState.getGlobalAddressReverseMap().clear(); } void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) EEState.RemoveMapping(FI); @@ -201,7 +201,7 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { } void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); ExecutionEngineState::GlobalAddressMapTy &Map = EEState.getGlobalAddressMap(); @@ -228,7 +228,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { } void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { - MutexGuard locked(lock); + std::lock_guard locked(lock); ExecutionEngineState::GlobalAddressMapTy::iterator I = EEState.getGlobalAddressMap().find(GV); @@ -236,7 +236,7 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { } const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // If we haven't computed the reverse mapping yet, do so first. if (EEState.getGlobalAddressReverseMap().empty()) { @@ -555,7 +555,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { if (Function *F = const_cast(dyn_cast(GV))) return getPointerToFunction(F); - MutexGuard locked(lock); + std::lock_guard locked(lock); if (void *P = EEState.getGlobalAddressMap()[GV]) return P; @@ -1346,7 +1346,7 @@ ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE) : EE(EE), GlobalAddressMap(this) { } -sys::Mutex * +std::recursive_mutex * ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) { return &EES->EE.lock; } diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 83ec9784b98e..463fa299b3f3 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -96,18 +96,18 @@ namespace { /// bugpoint or gdb users to search for a function by name without any context. class JitPool { SmallPtrSet JITs; // Optimize for process containing just 1 JIT. - mutable sys::Mutex Lock; + mutable std::recursive_mutex Lock; public: void Add(JIT *jit) { - MutexGuard guard(Lock); + std::lock_guard guard(Lock); JITs.insert(jit); } void Remove(JIT *jit) { - MutexGuard guard(Lock); + std::lock_guard guard(Lock); JITs.erase(jit); } void *getPointerToNamedFunction(const char *Name) const { - MutexGuard guard(Lock); + std::lock_guard guard(Lock); assert(JITs.size() != 0 && "No Jit registered"); //search function in every instance of JIT for (SmallPtrSet::const_iterator Jit = JITs.begin(), @@ -150,7 +150,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, AllJits->Add(this); // Add target data - MutexGuard locked(lock); + std::lock_guard locked(lock); FunctionPassManager &PM = jitstate->getPM(); M->setDataLayout(TM.getDataLayout()); PM.add(new DataLayoutPass(M)); @@ -177,7 +177,7 @@ JIT::~JIT() { /// addModule - Add a new Module to the JIT. If we previously removed the last /// Module, we need re-initialize jitstate with a valid Module. void JIT::addModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); if (Modules.empty()) { assert(!jitstate && "jitstate should be NULL if Modules vector is empty!"); @@ -206,7 +206,7 @@ void JIT::addModule(Module *M) { bool JIT::removeModule(Module *M) { bool result = ExecutionEngine::removeModule(M); - MutexGuard locked(lock); + std::lock_guard locked(lock); if (jitstate && jitstate->getModule() == M) { delete jitstate; @@ -408,13 +408,13 @@ GenericValue JIT::runFunction(Function *F, void JIT::RegisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); EventListeners.push_back(L); } void JIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); std::vector::reverse_iterator I= std::find(EventListeners.rbegin(), EventListeners.rend(), L); if (I != EventListeners.rend()) { @@ -426,14 +426,14 @@ void JIT::NotifyFunctionEmitted( const Function &F, void *Code, size_t Size, const JITEvent_EmittedFunctionDetails &Details) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details); } } void JIT::NotifyFreeingMachineCode(void *OldPtr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyFreeingMachineCode(OldPtr); } @@ -444,7 +444,7 @@ void JIT::NotifyFreeingMachineCode(void *OldPtr) { /// GlobalAddress[F] with the address of F's machine code. /// void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { - MutexGuard locked(lock); + std::lock_guard locked(lock); class MCIListener : public JITEventListener { MachineCodeInfo *const MCI; @@ -505,7 +505,7 @@ void *JIT::getPointerToFunction(Function *F) { if (void *Addr = getPointerToGlobalIfAvailable(F)) return Addr; // Check if function already code gen'd - MutexGuard locked(lock); + std::lock_guard locked(lock); // Now that this thread owns the lock, make sure we read in the function if it // exists in this Module. @@ -534,7 +534,7 @@ void *JIT::getPointerToFunction(Function *F) { } void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap().find(BB); @@ -546,7 +546,7 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { } void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { - MutexGuard locked(lock); + std::lock_guard locked(lock); getBasicBlockAddressMap().erase(BB); } @@ -555,7 +555,7 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) { (void)getPointerToFunction(BB->getParent()); // resolve basic block address - MutexGuard locked(lock); + std::lock_guard locked(lock); BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap().find(BB); @@ -592,7 +592,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { - MutexGuard locked(lock); + std::lock_guard locked(lock); void *Ptr = getPointerToGlobalIfAvailable(GV); if (Ptr) return Ptr; @@ -666,7 +666,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { size_t S = getDataLayout()->getTypeAllocSize(GlobalType); size_t A = getDataLayout()->getPreferredAlignment(GV); if (GV->isThreadLocal()) { - MutexGuard locked(lock); + std::lock_guard locked(lock); Ptr = TJI.allocateThreadLocalMemory(S); } else if (TJI.allocateSeparateGVMemory()) { if (A <= 8) { @@ -687,7 +687,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { } void JIT::addPendingFunction(Function *F) { - MutexGuard locked(lock); + std::lock_guard locked(lock); jitstate->getPendingFunctions().push_back(F); } diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 50b8c10b638b..1e1f5d55802c 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -40,7 +40,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Memory.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetJITInfo.h" @@ -50,6 +49,7 @@ #ifndef NDEBUG #include #endif +#include using namespace llvm; #define DEBUG_TYPE "jit" @@ -230,22 +230,22 @@ namespace { std::map Map; /// Guards Map from concurrent accesses. - mutable sys::Mutex Lock; + mutable std::recursive_mutex Lock; public: /// Registers a Stub to be resolved by Resolver. void RegisterStubResolver(void *Stub, JITResolver *Resolver) { - MutexGuard guard(Lock); + std::lock_guard guard(Lock); Map.insert(std::make_pair(Stub, Resolver)); } /// Unregisters the Stub when it's invalidated. void UnregisterStubResolver(void *Stub) { - MutexGuard guard(Lock); + std::lock_guard guard(Lock); Map.erase(Stub); } /// Returns the JITResolver instance that owns the Stub. JITResolver *getResolverFromStub(void *Stub) const { - MutexGuard guard(Lock); + std::lock_guard guard(Lock); // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. // This is the same trick as in LookupFunctionFromCallSite from @@ -258,7 +258,7 @@ namespace { /// True if any stubs refer to the given resolver. Only used in an assert(). /// O(N) bool ResolverHasStubs(JITResolver* Resolver) const { - MutexGuard guard(Lock); + std::lock_guard guard(Lock); for (std::map::const_iterator I = Map.begin(), E = Map.end(); I != E; ++I) { if (I->second == Resolver) @@ -494,7 +494,7 @@ JITResolver::~JITResolver() { /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub /// if it has already been created. void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { - MutexGuard locked(TheJIT->lock); + std::lock_guard guard(TheJIT->lock); // If we already have a stub for this function, recycle it. return state.getFunctionToLazyStubMap().lookup(F); @@ -503,7 +503,7 @@ void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { /// getFunctionStub - This returns a pointer to a function stub, creating /// one on demand as needed. void *JITResolver::getLazyFunctionStub(Function *F) { - MutexGuard locked(TheJIT->lock); + std::lock_guard guard(TheJIT->lock); // If we already have a lazy stub for this function, recycle it. void *&Stub = state.getFunctionToLazyStubMap()[F]; @@ -564,7 +564,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { /// getGlobalValueIndirectSym - Return a lazy pointer containing the specified /// GV address. void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { - MutexGuard locked(TheJIT->lock); + std::lock_guard guard(TheJIT->lock); // If we already have a stub for this global variable, recycle it. void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV]; @@ -622,7 +622,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { // Only lock for getting the Function. The call getPointerToFunction made // in this function might trigger function materializing, which requires // JIT lock to be unlocked. - MutexGuard locked(JR->TheJIT->lock); + std::lock_guard guard(JR->TheJIT->lock); // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. @@ -654,7 +654,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { } // Reacquire the lock to update the GOT map. - MutexGuard locked(JR->TheJIT->lock); + std::lock_guard locked(JR->TheJIT->lock); // We might like to remove the call site from the CallSiteToFunction map, but // we can't do that! Multiple threads could be stuck, waiting to acquire the diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index e9ba96a6496f..f149edcb7b04 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -26,7 +26,6 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; @@ -66,7 +65,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, } MCJIT::~MCJIT() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // FIXME: We are managing our modules, so we do not want the base class // ExecutionEngine to manage them as well. To avoid double destruction // of the first (and only) module added in ExecutionEngine constructor @@ -102,12 +101,12 @@ MCJIT::~MCJIT() { } void MCJIT::addModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); OwnedModules.addModule(M); } bool MCJIT::removeModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); return OwnedModules.removeModule(M); } @@ -129,12 +128,12 @@ void MCJIT::addArchive(object::Archive *A) { void MCJIT::setObjectCache(ObjectCache* NewCache) { - MutexGuard locked(lock); + std::lock_guard locked(lock); ObjCache = NewCache; } ObjectBufferStream* MCJIT::emitObject(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // This must be a module which has already been added but not loaded to this // MCJIT instance, since these conditions are tested by our caller, @@ -174,7 +173,7 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) { void MCJIT::generateCodeForModule(Module *M) { // Get a thread lock to make sure we aren't trying to load multiple times - MutexGuard locked(lock); + std::lock_guard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && @@ -214,7 +213,7 @@ void MCJIT::generateCodeForModule(Module *M) { } void MCJIT::finalizeLoadedModules() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Resolve any outstanding relocations. Dyld.resolveRelocations(); @@ -230,7 +229,7 @@ void MCJIT::finalizeLoadedModules() { // FIXME: Rename this. void MCJIT::finalizeObject() { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (ModulePtrSet::iterator I = OwnedModules.begin_added(), E = OwnedModules.end_added(); @@ -243,7 +242,7 @@ void MCJIT::finalizeObject() { } void MCJIT::finalizeModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module."); @@ -268,7 +267,7 @@ uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { Module *MCJIT::findModuleForSymbol(const std::string &Name, bool CheckFunctionsOnly) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // If it hasn't already been generated, see if it's in one of our modules. for (ModulePtrSet::iterator I = OwnedModules.begin_added(), @@ -292,7 +291,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name, uint64_t MCJIT::getSymbolAddress(const std::string &Name, bool CheckFunctionsOnly) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // First, check to see if we already have this symbol. uint64_t Addr = getExistingSymbolAddress(Name); @@ -336,7 +335,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, } uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Result = getSymbolAddress(Name, false); if (Result != 0) finalizeLoadedModules(); @@ -344,7 +343,7 @@ uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { } uint64_t MCJIT::getFunctionAddress(const std::string &Name) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Result = getSymbolAddress(Name, true); if (Result != 0) finalizeLoadedModules(); @@ -353,7 +352,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) { // Deprecated. Use getFunctionAddress instead. void *MCJIT::getPointerToFunction(Function *F) { - MutexGuard locked(lock); + std::lock_guard locked(lock); if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); @@ -552,13 +551,13 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name, void MCJIT::RegisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); EventListeners.push_back(L); } void MCJIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); SmallVector::reverse_iterator I= std::find(EventListeners.rbegin(), EventListeners.rend(), L); if (I != EventListeners.rend()) { @@ -567,14 +566,14 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) { } } void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) { - MutexGuard locked(lock); + std::lock_guard locked(lock); MemMgr.notifyObjectLoaded(this, &Obj); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyObjectEmitted(Obj); } } void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyFreeingObject(Obj); } diff --git a/unittests/IR/ValueMapTest.cpp b/unittests/IR/ValueMapTest.cpp index 248740aeb726..51acd2fc641a 100644 --- a/unittests/IR/ValueMapTest.cpp +++ b/unittests/IR/ValueMapTest.cpp @@ -186,19 +186,19 @@ struct LockMutex : ValueMapConfig { }; static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) { *Data.CalledRAUW = true; - EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked."; + EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked."; } static void onDelete(const ExtraData &Data, KeyT Old) { *Data.CalledDeleted = true; - EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked."; + EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked."; } static MutexT *getMutex(const ExtraData &Data) { return Data.M; } }; #if LLVM_ENABLE_THREADS TYPED_TEST(ValueMapTest, LocksMutex) { - sys::Mutex M(false); // Not recursive. + std::mutex M; // Not recursive. bool CalledRAUW = false, CalledDeleted = false; - typedef LockMutex ConfigType; + typedef LockMutex ConfigType; typename ConfigType::ExtraData Data = {&M, &CalledRAUW, &CalledDeleted}; ValueMap VM(Data); VM[this->BitcastV.get()] = 7; From 791ff5af548986d9ba11f6847421c4478a85f4c7 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 18 Jun 2014 20:57:28 +0000 Subject: [PATCH 0002/1155] MCAsmParser: full support for gas' '.if{cond} expression' directives Patch by Janne Grunau! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211218 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 49 +++++++++++++++++++---- test/MC/AsmParser/conditional_asm.s | 60 +++++++++++++++++++++++++++++ test/MC/AsmParser/if-diagnostics.s | 29 ++++++++++++++ 3 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 test/MC/AsmParser/if-diagnostics.s diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index cbff7beccae0..932d0f3318ec 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -345,8 +345,9 @@ class AsmParser : public MCAsmParser { DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE, DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT, DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC, - DK_IF, DK_IFNE, DK_IFB, DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, - DK_IFNDEF, DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF, + DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB, + DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF, + DK_ELSEIF, DK_ELSE, DK_ENDIF, DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS, DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA, DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER, @@ -433,8 +434,8 @@ class AsmParser : public MCAsmParser { bool parseDirectiveInclude(); // ".include" bool parseDirectiveIncbin(); // ".incbin" - // ".if" or ".ifne" - bool parseDirectiveIf(SMLoc DirectiveLoc); + // ".if", ".ifeq", ".ifge", ".ifgt" , ".ifle", ".iflt" or ".ifne" + bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind); // ".ifb" or ".ifnb", depending on ExpectBlank. bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); // ".ifc" or ".ifnc", depending on ExpectEqual. @@ -1229,8 +1230,13 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) { default: break; case DK_IF: + case DK_IFEQ: + case DK_IFGE: + case DK_IFGT: + case DK_IFLE: + case DK_IFLT: case DK_IFNE: - return parseDirectiveIf(IDLoc); + return parseDirectiveIf(IDLoc, DirKind); case DK_IFB: return parseDirectiveIfb(IDLoc, true); case DK_IFNB: @@ -3792,9 +3798,8 @@ bool AsmParser::parseDirectiveIncbin() { } /// parseDirectiveIf -/// ::= .if expression -/// ::= .ifne expression -bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) { +/// ::= .if{,eq,ge,gt,le,lt,ne} expression +bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { @@ -3809,6 +3814,29 @@ bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) { Lex(); + switch (DirKind) { + default: + llvm_unreachable("unsupported directive"); + case DK_IF: + case DK_IFNE: + break; + case DK_IFEQ: + ExprValue = ExprValue == 0; + break; + case DK_IFGE: + ExprValue = ExprValue >= 0; + break; + case DK_IFGT: + ExprValue = ExprValue > 0; + break; + case DK_IFLE: + ExprValue = ExprValue <= 0; + break; + case DK_IFLT: + ExprValue = ExprValue < 0; + break; + } + TheCondState.CondMet = ExprValue; TheCondState.Ignore = !TheCondState.CondMet; } @@ -4111,6 +4139,11 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK; DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK; DirectiveKindMap[".if"] = DK_IF; + DirectiveKindMap[".ifeq"] = DK_IFEQ; + DirectiveKindMap[".ifge"] = DK_IFGE; + DirectiveKindMap[".ifgt"] = DK_IFGT; + DirectiveKindMap[".ifle"] = DK_IFLE; + DirectiveKindMap[".iflt"] = DK_IFLT; DirectiveKindMap[".ifne"] = DK_IFNE; DirectiveKindMap[".ifb"] = DK_IFB; DirectiveKindMap[".ifnb"] = DK_IFNB; diff --git a/test/MC/AsmParser/conditional_asm.s b/test/MC/AsmParser/conditional_asm.s index b9bee33c6a16..ecbceb1dc362 100644 --- a/test/MC/AsmParser/conditional_asm.s +++ b/test/MC/AsmParser/conditional_asm.s @@ -11,6 +11,66 @@ .endif .endif +# CHECK: .byte 0 +# CHECK-NOT: .byte 1 +.ifeq 32 - 32 + .byte 0 +.else + .byte 1 +.endif + +# CHECK: .byte 0 +# CHECK: .byte 1 +# CHECK-NOT: .byte 2 +.ifge 32 - 31 + .byte 0 +.endif +.ifge 32 - 32 + .byte 1 +.endif +.ifge 32 - 33 + .byte 2 +.endif + +# CHECK: .byte 0 +# CHECK-NOT: .byte 1 +# CHECK-NOT: .byte 2 +.ifgt 32 - 31 + .byte 0 +.endif +.ifgt 32 - 32 + .byte 1 +.endif +.ifgt 32 - 33 + .byte 2 +.endif + +# CHECK-NOT: .byte 0 +# CHECK: .byte 1 +# CHECK: .byte 2 +.ifle 32 - 31 + .byte 0 +.endif +.ifle 32 - 32 + .byte 1 +.endif +.ifle 32 - 33 + .byte 2 +.endif + +# CHECK-NOT: .byte 0 +# CHECK-NOT: .byte 1 +# CHECK: .byte 2 +.iflt 32 - 31 + .byte 0 +.endif +.iflt 32 - 32 + .byte 1 +.endif +.iflt 32 - 33 + .byte 2 +.endif + # CHECK: .byte 1 # CHECK-NOT: .byte 0 .ifne 32 - 32 diff --git a/test/MC/AsmParser/if-diagnostics.s b/test/MC/AsmParser/if-diagnostics.s new file mode 100644 index 000000000000..d102a5686d98 --- /dev/null +++ b/test/MC/AsmParser/if-diagnostics.s @@ -0,0 +1,29 @@ +// RUN: not llvm-mc -triple i386 %s -o /dev/null 2>&1 | FileCheck %s + +.if +.endif + +// CHECK: error: unknown token in expression +// CHECK: .if +// CHECK: ^ + +.ifeq 0, 3 +.endif + +// CHECK:error: unexpected token in '.if' directive +// CHECK: .ifeq 0, 3 +// CHECK: ^ + +.iflt "string1" +.endif + +// CHECK: error: expected absolute expression +// CHECK: .iflt "string1" +// CHECK: ^ + +.ifge test +.endif + +// CHECK: error: expected absolute expression +// CHECK: .ifge test +// CHECK: ^ From 889069443b66c07d698a6813e22d2498a30c749f Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 18 Jun 2014 20:57:32 +0000 Subject: [PATCH 0003/1155] MC: do not add comment string to the AsmToken in AsmLexer::LexLineComment Fixes macros with varargs if the macro instantiation has a trailing comment. Patch by Janne Grunau! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211219 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 4 ++-- test/MC/AsmParser/vararg.s | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index bca516eca027..7991ef5fe09b 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -201,8 +201,8 @@ AsmToken AsmLexer::LexLineComment() { CurChar = getNextChar(); if (CurChar == EOF) - return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); - return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); + return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { diff --git a/test/MC/AsmParser/vararg.s b/test/MC/AsmParser/vararg.s index b27668ea3376..e3236b072d12 100644 --- a/test/MC/AsmParser/vararg.s +++ b/test/MC/AsmParser/vararg.s @@ -17,6 +17,12 @@ .endif .endm +.macro ifcc4 arg0, arg1:vararg +.if cc + movl \arg1, \arg0 +.endif +.endm + .text // CHECK: movl %esp, %ebp @@ -25,6 +31,8 @@ // CHECK: movl %ecx, %ebx // CHECK: movl %ecx, %eax // CHECK: movl %eax, %ecx +// CHECK: movl %ecx, %eax +// CHECK: movl %eax, %ecx .set cc,1 ifcc movl %esp, %ebp subl $0, %esp @@ -33,6 +41,8 @@ ifcc2 %ecx, %ebx ifcc3 %ecx %eax ifcc3 %eax, %ecx + ifcc4 %eax %ecx ## test + ifcc4 %ecx, %eax ## test // CHECK-NOT movl // CHECK: subl $1, %esp From d8fc64e0d1f09abb3265a6ff264629ca33a52c6a Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 18 Jun 2014 21:08:17 +0000 Subject: [PATCH 0004/1155] Simplify code. We can delete the objects earlier now that we are copying the names to a buffer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211221 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-ar/llvm-ar.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp index 196240a0c2d4..920a4701b273 100644 --- a/tools/llvm-ar/llvm-ar.cpp +++ b/tools/llvm-ar/llvm-ar.cpp @@ -692,7 +692,6 @@ static void writeSymbolTable( std::string NameBuf; raw_string_ostream NameOS(NameBuf); unsigned NumSyms = 0; - std::vector DeleteIt; LLVMContext &Context = getGlobalContext(); for (ArrayRef::iterator I = Members.begin(), E = Members.end(); @@ -703,9 +702,8 @@ static void writeSymbolTable( MemberBuffer, false, sys::fs::file_magic::unknown, &Context); if (!ObjOrErr) continue; // FIXME: check only for "not an object file" errors. - object::SymbolicFile *Obj = ObjOrErr.get(); + std::unique_ptr Obj(ObjOrErr.get()); - DeleteIt.push_back(Obj); if (!StartOffset) { printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0); StartOffset = Out.tell(); @@ -731,13 +729,6 @@ static void writeSymbolTable( } Out << NameOS.str(); - for (std::vector::iterator I = DeleteIt.begin(), - E = DeleteIt.end(); - I != E; ++I) { - object::SymbolicFile *O = *I; - delete O; - } - if (StartOffset == 0) return; From 6c59006684aa23429900d175e0f0261c7cd594e4 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 18 Jun 2014 21:14:57 +0000 Subject: [PATCH 0005/1155] Add a symbols() range and use a range loop. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211222 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Object/SymbolicFile.h | 4 ++++ tools/llvm-ar/llvm-ar.cpp | 8 +++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h index 113373694556..40015ec9f6db 100644 --- a/include/llvm/Object/SymbolicFile.h +++ b/include/llvm/Object/SymbolicFile.h @@ -136,6 +136,10 @@ class SymbolicFile : public Binary { basic_symbol_iterator symbol_end() const { return symbol_end_impl(); } + typedef iterator_range basic_symbol_iterator_range; + basic_symbol_iterator_range symbols() const { + return basic_symbol_iterator_range(symbol_begin(), symbol_end()); + } // construction aux. static ErrorOr createIRObjectFile(MemoryBuffer *Object, diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp index 920a4701b273..80a977c30bc8 100644 --- a/tools/llvm-ar/llvm-ar.cpp +++ b/tools/llvm-ar/llvm-ar.cpp @@ -710,17 +710,15 @@ static void writeSymbolTable( print32BE(Out, 0); } - for (object::basic_symbol_iterator I = Obj->symbol_begin(), - E = Obj->symbol_end(); - I != E; ++I) { - uint32_t Symflags = I->getFlags(); + for (const object::BasicSymbolRef &S : Obj->symbols()) { + uint32_t Symflags = S.getFlags(); if (Symflags & object::SymbolRef::SF_FormatSpecific) continue; if (!(Symflags & object::SymbolRef::SF_Global)) continue; if (Symflags & object::SymbolRef::SF_Undefined) continue; - failIfError(I->printName(NameOS)); + failIfError(S.printName(NameOS)); NameOS << '\0'; ++NumSyms; MemberOffsetRefs.push_back(std::make_pair(Out.tell(), MemberNum)); From 311ea125062983e1949bc1efda1dfa8e853ee70d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 18 Jun 2014 21:40:43 +0000 Subject: [PATCH 0006/1155] Use LL suffix for literal that should be 64-bits. This hopefully fixes Windows git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211225 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index dc39bee6511e..45f10839040a 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1640,7 +1640,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); - const SDValue FractMask = DAG.getConstant((1L << FractBits) - 1, MVT::i64); + const SDValue FractMask = DAG.getConstant((1LL << FractBits) - 1, MVT::i64); SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); From f286d63757ae5d16f8211227a435e6ad4faa2c35 Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Wed, 18 Jun 2014 22:00:29 +0000 Subject: [PATCH 0007/1155] R600/SI: add gather4 and getlod intrinsics (v3) This contains all the previous patches + getlod support on top of it. It doesn't use SDNodes anymore, so it's quite small. It also adds v16i8 to SReg_128, which is used for the sampler descriptor. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211228 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 47 +++ lib/Target/R600/SIInstructions.td | 102 ++++-- lib/Target/R600/SIIntrinsics.td | 50 +++ lib/Target/R600/SIRegisterInfo.td | 2 +- test/CodeGen/R600/llvm.SI.gather4.ll | 508 +++++++++++++++++++++++++++ test/CodeGen/R600/llvm.SI.getlod.ll | 44 +++ 6 files changed, 727 insertions(+), 26 deletions(-) create mode 100644 test/CodeGen/R600/llvm.SI.gather4.ll create mode 100644 test/CodeGen/R600/llvm.SI.getlod.ll diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index eb9746779374..c4994a250a8e 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -712,6 +712,53 @@ multiclass MIMG_Sampler op, string asm> { defm _V4 : MIMG_Sampler_Src_Helper; } +class MIMG_Gather_Helper op, string asm, + RegisterClass dst_rc, + RegisterClass src_rc> : MIMG < + op, + (outs dst_rc:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, + SReg_256:$srsrc, SReg_128:$ssamp), + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," + #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", + []> { + let mayLoad = 1; + let mayStore = 0; + + // DMASK was repurposed for GATHER4. 4 components are always + // returned and DMASK works like a swizzle - it selects + // the component to fetch. The only useful DMASK values are + // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns + // (red,red,red,red) etc.) The ISA document doesn't mention + // this. + // Therefore, disable all code which updates DMASK by setting these two: + let MIMG = 0; + let hasPostISelHook = 0; +} + +multiclass MIMG_Gather_Src_Helper op, string asm, + RegisterClass dst_rc, + int channels> { + def _V1 : MIMG_Gather_Helper , + MIMG_Mask; + def _V2 : MIMG_Gather_Helper , + MIMG_Mask; + def _V4 : MIMG_Gather_Helper , + MIMG_Mask; + def _V8 : MIMG_Gather_Helper , + MIMG_Mask; + def _V16 : MIMG_Gather_Helper , + MIMG_Mask; +} + +multiclass MIMG_Gather op, string asm> { + defm _V1 : MIMG_Gather_Src_Helper; + defm _V2 : MIMG_Gather_Src_Helper; + defm _V3 : MIMG_Gather_Src_Helper; + defm _V4 : MIMG_Gather_Src_Helper; +} + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 428e49c6431c..60eb8f978180 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -987,31 +987,31 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; -//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; +defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">; +defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">; +defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">; +defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">; +defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">; +defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">; +defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">; +defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">; +defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">; +defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">; +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">; +defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">; +defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">; +defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">; +defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">; +defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">; +defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">; +defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">; +defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">; +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">; +defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">; +defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">; +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">; +defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">; +defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">; //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; //def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>; @@ -1773,6 +1773,58 @@ def : SextInReg ; /********** Image sampling patterns **********/ /********** ======================= **********/ +class SampleRawPattern : Pat < + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, i32:$dmask, i32:$unorm, + i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), + (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da), + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc), + $addr, $rsrc, $sampler) +>; + +// Only the variants which make sense are defined. +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; + +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; + +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; + +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; + +def : SampleRawPattern; +def : SampleRawPattern; +def : SampleRawPattern; + /* SIsample for simple 1D texture lookup */ def : Pat < (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 00e32c03a99e..df690a4f4b87 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -56,11 +56,61 @@ let TargetPrefix = "SI", isTarget = 1 in { class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; + // Fully-flexible SAMPLE instruction. + class SampleRaw : Intrinsic < + [llvm_v4f32_ty], // vdata(VGPR) + [llvm_anyint_ty, // vaddr(VGPR) + llvm_v32i8_ty, // rsrc(SGPR) + llvm_v16i8_ty, // sampler(SGPR) + llvm_i32_ty, // dmask(imm) + llvm_i32_ty, // unorm(imm) + llvm_i32_ty, // r128(imm) + llvm_i32_ty, // da(imm) + llvm_i32_ty, // glc(imm) + llvm_i32_ty, // slc(imm) + llvm_i32_ty, // tfe(imm) + llvm_i32_ty], // lwe(imm) + [IntrNoMem]>; + def int_SI_sample : Sample; def int_SI_sampleb : Sample; def int_SI_sampled : Sample; def int_SI_samplel : Sample; + // Basic gather4 + def int_SI_gather4 : SampleRaw; + def int_SI_gather4_cl : SampleRaw; + def int_SI_gather4_l : SampleRaw; + def int_SI_gather4_b : SampleRaw; + def int_SI_gather4_b_cl : SampleRaw; + def int_SI_gather4_lz : SampleRaw; + + // Gather4 with comparison + def int_SI_gather4_c : SampleRaw; + def int_SI_gather4_c_cl : SampleRaw; + def int_SI_gather4_c_l : SampleRaw; + def int_SI_gather4_c_b : SampleRaw; + def int_SI_gather4_c_b_cl : SampleRaw; + def int_SI_gather4_c_lz : SampleRaw; + + // Gather4 with offsets + def int_SI_gather4_o : SampleRaw; + def int_SI_gather4_cl_o : SampleRaw; + def int_SI_gather4_l_o : SampleRaw; + def int_SI_gather4_b_o : SampleRaw; + def int_SI_gather4_b_cl_o : SampleRaw; + def int_SI_gather4_lz_o : SampleRaw; + + // Gather4 with comparison and offsets + def int_SI_gather4_c_o : SampleRaw; + def int_SI_gather4_c_cl_o : SampleRaw; + def int_SI_gather4_c_l_o : SampleRaw; + def int_SI_gather4_c_b_o : SampleRaw; + def int_SI_gather4_c_b_cl_o : SampleRaw; + def int_SI_gather4_c_lz_o : SampleRaw; + + def int_SI_getlod : SampleRaw; + def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index f1f01deaf361..8974b6300625 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64Regs, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll new file mode 100644 index 000000000000..8402faaa4dca --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.gather4.ll @@ -0,0 +1,508 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: @gather4_v2 +;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_v2() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4 +;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_cl +;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_l +;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_l() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b +;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_cl +;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_cl_v8 +;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_cl_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_lz_v2 +;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_lz_v2() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_lz +;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_lz() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: @gather4_o +;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_cl_o +;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_cl_o_v8 +;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_cl_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_l_o +;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_l_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_l_o_v8 +;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_l_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_o +;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_o_v8 +;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_cl_o +;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_lz_o +;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_lz_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: @gather4_c +;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_cl +;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_cl_v8 +;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_cl_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_l +;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_l() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_l_v8 +;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_l_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b +;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_v8 +;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_cl +;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_lz +;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_lz() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: @gather4_c_o +;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_o_v8 +;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_cl_o +;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_l_o +;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_l_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_o +;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_cl_o +;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_lz_o +;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_lz_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_lz_o_v8 +;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_lz_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll new file mode 100644 index 000000000000..a7a17ec3fffa --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.getlod.ll @@ -0,0 +1,44 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: @getlod +;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @getlod() #0 { +main_body: + %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + +;CHECK-LABEL: @getlod_v2 +;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @getlod_v2() #0 { +main_body: + %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + +;CHECK-LABEL: @getlod_v4 +;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @getlod_v4() #0 { +main_body: + %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + + +declare <4 x float> @llvm.SI.getlod.i32(i32, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } From ce09bda96ee00a1024952faf85e3fa01e0d6bc72 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 18 Jun 2014 22:03:45 +0000 Subject: [PATCH 0008/1155] R600: Handle fnearbyint The difference from rint isn't really relevant here, so treat them as equivalent. OpenCL doesn't have nearbyint, so this is sort of pointless other than for completeness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211229 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 12 ++++++ lib/Target/R600/AMDGPUISelLowering.h | 1 + test/CodeGen/R600/fnearbyint.ll | 57 ++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 test/CodeGen/R600/fnearbyint.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 45f10839040a..fd24af062474 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -325,6 +325,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); @@ -334,6 +335,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FCOPYSIGN, VT, Expand); } + setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); + setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SELECT_CC); @@ -501,6 +505,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::FCEIL: return LowerFCEIL(Op, DAG); case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); case ISD::FRINT: return LowerFRINT(Op, DAG); + case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); @@ -1683,6 +1688,13 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); } +SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const { + // FNEARBYINT and FRINT are the same, except in their handling of FP + // exceptions. Those aren't really meaningful for us, and OpenCL only has + // rint, so just treat them as equivalent. + return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); +} + SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 5be3070f589b..b2bb2579dcb3 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -54,6 +54,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll new file mode 100644 index 000000000000..1c1d7315189f --- /dev/null +++ b/test/CodeGen/R600/fnearbyint.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s + +; This should have the exactly the same output as the test for rint, +; so no need to check anything. + +declare float @llvm.nearbyint.f32(float) #0 +declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0 +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0 +declare double @llvm.nearbyint.f64(double) #0 +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0 +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0 + + +define void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 { +entry: + %0 = call float @llvm.nearbyint.f32(float %in) + store float %0, float addrspace(1)* %out + ret void +} + +define void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 { +entry: + %0 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %in) + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + +define void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 { +entry: + %0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in) + store <4 x float> %0, <4 x float> addrspace(1)* %out + ret void +} + +define void @nearbyint_f64(double addrspace(1)* %out, double %in) { +entry: + %0 = call double @llvm.nearbyint.f64(double %in) + store double %0, double addrspace(1)* %out + ret void +} +define void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { +entry: + %0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in) + store <2 x double> %0, <2 x double> addrspace(1)* %out + ret void +} + +define void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { +entry: + %0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %in) + store <4 x double> %0, <4 x double> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind } From fb2b9fb894b6c9be8a290bdf69c29f0f5511ba3c Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Wed, 18 Jun 2014 22:04:40 +0000 Subject: [PATCH 0009/1155] Teach llvm-size to know about Mach-O universal files (aka fat files) and fat files containing archives. Also fix a bug in MachOUniversalBinary::ObjectForArch::ObjectForArch() where it needed a >= when comparing the Index with the number of objects in a fat file. As the index starts at 0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211230 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOUniversal.cpp | 2 +- test/Object/size-trivial-macho.test | 12 +++++++ tools/llvm-size/llvm-size.cpp | 52 +++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index e414de8bcf17..05729ef7467e 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -53,7 +53,7 @@ static T getUniversalBinaryStruct(const char *Ptr) { MachOUniversalBinary::ObjectForArch::ObjectForArch( const MachOUniversalBinary *Parent, uint32_t Index) : Parent(Parent), Index(Index) { - if (!Parent || Index > Parent->getNumberOfObjects()) { + if (!Parent || Index >= Parent->getNumberOfObjects()) { clear(); } else { // Parse object header. diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test index 960fb25fbecb..1ed611b0de22 100644 --- a/test/Object/size-trivial-macho.test +++ b/test/Object/size-trivial-macho.test @@ -10,6 +10,10 @@ RUN: llvm-size -format darwin %p/Inputs/macho-archive-x86_64.a \ RUN: | FileCheck %s -check-prefix mAR RUN: llvm-size -format darwin -x -l %p/Inputs/hello-world.macho-x86_64 \ RUN: | FileCheck %s -check-prefix mxl +RUN: llvm-size %p/Inputs/macho-universal.x86_64.i386 \ +RUN: | FileCheck %s -check-prefix u +RUN: llvm-size %p/Inputs/macho-universal-archive.x86_64.i386 \ +RUN: | FileCheck %s -check-prefix uAR A: section size addr A: __text 12 0 @@ -65,3 +69,11 @@ mxl: Section __la_symbol_ptr: 0x8 (addr 0x100001010 offset 4112) mxl: total 0x18 mxl: Segment __LINKEDIT: 0x1000 (vmaddr 0x100002000 fileoff 8192) mxl: total 0x100003000 + +u: __TEXT __DATA __OBJC others dec hex +u: 4096 0 0 4294971392 4294975488 100002000 {{.*}}/macho-universal.x86_64.i386:x86_64 +u: 4096 0 0 8192 12288 3000 {{.*}}/macho-universal.x86_64.i386:i386 + +uAR: __TEXT __DATA __OBJC others dec hex +uAR: 136 0 0 32 168 a8 {{.*}}/macho-universal-archive.x86_64.i386:x86_64(hello.o) +uAR: 5 4 0 0 9 9 {{.*}}/macho-universal-archive.x86_64.i386:i386(foo.o) diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp index 5e4e4aa932bb..b3aaac8d27dd 100644 --- a/tools/llvm-size/llvm-size.cpp +++ b/tools/llvm-size/llvm-size.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -456,6 +457,57 @@ static void PrintFileSectionSizes(StringRef file) { } } } + } else if (MachOUniversalBinary *UB = + dyn_cast(binary.get())) { + // This is a Mach-O universal binary. Iterate over each object and + // display its sizes. + bool moreThanOneArch = UB->getNumberOfObjects() > 1; + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + std::unique_ptr UO; + std::unique_ptr UA; + if (!I->getAsObjectFile(UO)) { + if (ObjectFile *o = dyn_cast(&*UO.get())) { + if (OutputFormat == sysv) + outs() << o->getFileName() << " :\n"; + PrintObjectSectionSizes(o); + if (OutputFormat == berkeley) { + MachOObjectFile *MachO = dyn_cast(o); + if (!MachO || moreThanOneFile || moreThanOneArch) + outs() << o->getFileName(); + outs() << "\n"; + } + } + } + else if (!I->getAsArchive(UA)) { + // This is an archive. Iterate over each member and display its sizes. + for (object::Archive::child_iterator i = UA->child_begin(), + e = UA->child_end(); i != e; ++i) { + ErrorOr> ChildOrErr = i->getAsBinary(); + if (std::error_code EC = ChildOrErr.getError()) { + errs() << ToolName << ": " << file << ": " << EC.message() << ".\n"; + continue; + } + if (ObjectFile *o = dyn_cast(&*ChildOrErr.get())) { + MachOObjectFile *MachO = dyn_cast(o); + if (OutputFormat == sysv) + outs() << o->getFileName() << " (ex " << UA->getFileName() + << "):\n"; + else if(MachO && OutputFormat == darwin) + outs() << UA->getFileName() << "(" << o->getFileName() << "):\n"; + PrintObjectSectionSizes(o); + if (OutputFormat == berkeley) { + if (MachO) + outs() << UA->getFileName() << "(" << o->getFileName() << ")\n"; + else + outs() << o->getFileName() << " (ex " << UA->getFileName() + << ")\n"; + } + } + } + } + } } else if (ObjectFile *o = dyn_cast(binary.get())) { if (OutputFormat == sysv) outs() << o->getFileName() << " :\n"; From 2ea6d93c5ea813994852e35f01314ccbcec02bce Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 18 Jun 2014 22:11:03 +0000 Subject: [PATCH 0010/1155] Use stdint macros for specifying size of constants git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211231 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index fd24af062474..34c2b2bf61d0 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1636,7 +1636,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(1023, MVT::i32)); // Extract the sign bit. - const SDValue SignBitMask = DAG.getConstant(1ul << 31, MVT::i32); + const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32); SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask); // Extend back to to 64-bits. @@ -1645,7 +1645,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); - const SDValue FractMask = DAG.getConstant((1LL << FractBits) - 1, MVT::i64); + const SDValue FractMask + = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64); SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); From 887a5c7f5da73ada835985d1f9552ff1716517a7 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 18 Jun 2014 22:48:09 +0000 Subject: [PATCH 0011/1155] Move ARMJITInfo off of the TargetMachine and down onto the subtarget. This required untangling a mess of headers that included around. This a recommit of r210953 with a fix for the removed accessor for JITInfo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211233 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCodeEmitter.cpp | 1 + lib/Target/ARM/ARMJITInfo.cpp | 8 ++++++++ lib/Target/ARM/ARMJITInfo.h | 8 +------- lib/Target/ARM/ARMMachineFunctionInfo.cpp | 10 ++++++++++ lib/Target/ARM/ARMMachineFunctionInfo.h | 11 +---------- lib/Target/ARM/ARMSubtarget.cpp | 2 +- lib/Target/ARM/ARMSubtarget.h | 3 +++ lib/Target/ARM/ARMTargetMachine.cpp | 2 +- lib/Target/ARM/ARMTargetMachine.h | 7 ++----- 9 files changed, 28 insertions(+), 24 deletions(-) diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 2fd7eddd8748..5fb6ebfeaae1 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -15,6 +15,7 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 8821c2dd09f8..6d1114d51aab 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -13,6 +13,7 @@ #include "ARMJITInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" #include "ARMRelocations.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/JITCodeEmitter.h" @@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, } } } + +void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) { + const ARMFunctionInfo *AFI = MF.getInfo(); + ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); + JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); + IsPIC = isPIC; +} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index ee4c863543e7..27e2a2013404 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -14,7 +14,6 @@ #ifndef ARMJITINFO_H #define ARMJITINFO_H -#include "ARMMachineFunctionInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -103,12 +102,7 @@ namespace llvm { /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize /// jump table ids to jump table bases map; remember if codegen relocation /// model is PIC. - void Initialize(const MachineFunction &MF, bool isPIC) { - const ARMFunctionInfo *AFI = MF.getInfo(); - ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); - JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); - IsPIC = isPIC; - } + void Initialize(const MachineFunction &MF, bool isPIC); /// getConstantPoolEntryAddr - The ARM target puts all constant /// pool entries into constant islands. This returns the address of the diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp index af445e2f35aa..892b269fc181 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -12,3 +12,13 @@ using namespace llvm; void ARMFunctionInfo::anchor() { } + +ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) + : isThumb(MF.getTarget().getSubtarget().isThumb()), + hasThumb2(MF.getTarget().getSubtarget().hasThumb2()), + StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), + RestoreSPFromFP(false), LRSpilledForFarJump(false), + FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0), + PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), + GlobalBaseReg(0) {} diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 7934761af25b..44a9e3495b90 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -130,16 +130,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} - explicit ARMFunctionInfo(MachineFunction &MF) : - isThumb(MF.getTarget().getSubtarget().isThumb()), - hasThumb2(MF.getTarget().getSubtarget().hasThumb2()), - StByValParamsPadding(0), - ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), - LRSpilledForFarJump(false), - FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} + explicit ARMFunctionInfo(MachineFunction &MF); bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index fc842512ef00..875f1e83f875 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -148,7 +148,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), - TSInfo(DL) {} + TSInfo(DL), JITInfo() {} void ARMSubtarget::initializeEnvironment() { HasV4TOps = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 7da80ec0d494..ae62a6f876fd 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -14,6 +14,7 @@ #ifndef ARMSUBTARGET_H #define ARMSUBTARGET_H +#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" @@ -256,10 +257,12 @@ class ARMSubtarget : public ARMGenSubtargetInfo { const DataLayout *getDataLayout() const { return &DL; } const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + ARMJITInfo *getJITInfo() { return &JITInfo; } private: const DataLayout DL; ARMSelectionDAGInfo TSInfo; + ARMJITInfo JITInfo; void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index a93824230d70..7a3836ae62b4 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -52,7 +52,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle, Options), JITInfo() { + Subtarget(TT, CPU, FS, isLittle, Options) { // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 154927786082..737c2fae1a80 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -32,10 +32,6 @@ namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { protected: ARMSubtarget Subtarget; - -private: - ARMJITInfo JITInfo; - public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -44,7 +40,6 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { CodeGenOpt::Level OL, bool isLittle); - ARMJITInfo *getJITInfo() override { return &JITInfo; } const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } const ARMTargetLowering *getTargetLowering() const override { // Implemented by derived classes @@ -56,6 +51,8 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { const DataLayout *getDataLayout() const override { return getSubtargetImpl()->getDataLayout(); } + ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } + /// \brief Register ARM analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; From 1260b844fd71720566eabd4cc5738f2abe747871 Mon Sep 17 00:00:00 2001 From: Nikola Smiljanic Date: Thu, 19 Jun 2014 00:26:49 +0000 Subject: [PATCH 0012/1155] PR10140 - StringPool's PooledStringPtr has non-const operator== causing bad OR-result. Mark conversion operator explicit and const qualify comparison operators. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211244 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/StringPool.h | 6 +++--- unittests/Support/CMakeLists.txt | 1 + unittests/Support/StringPool.cpp | 31 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 unittests/Support/StringPool.cpp diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h index 7e1394cb2335..7306ce2240b2 100644 --- a/include/llvm/Support/StringPool.h +++ b/include/llvm/Support/StringPool.h @@ -128,10 +128,10 @@ namespace llvm { } inline const char *operator*() const { return begin(); } - inline operator bool() const { return S != nullptr; } + inline explicit operator bool() const { return S != nullptr; } - inline bool operator==(const PooledStringPtr &That) { return S == That.S; } - inline bool operator!=(const PooledStringPtr &That) { return S != That.S; } + inline bool operator==(const PooledStringPtr &That) const { return S == That.S; } + inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; } }; } // End llvm namespace diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt index 0ea931033339..c50acdfb8e64 100644 --- a/unittests/Support/CMakeLists.txt +++ b/unittests/Support/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_unittest(SupportTests ProgramTest.cpp RegexTest.cpp SourceMgrTest.cpp + StringPool.cpp SwapByteOrderTest.cpp ThreadLocalTest.cpp TimeValueTest.cpp diff --git a/unittests/Support/StringPool.cpp b/unittests/Support/StringPool.cpp new file mode 100644 index 000000000000..7b7805f91710 --- /dev/null +++ b/unittests/Support/StringPool.cpp @@ -0,0 +1,31 @@ +//===- llvm/unittest/Support/ThreadLocalTest.cpp - Therad Local tests ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StringPool.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(PooledStringPtrTest, OperatorEquals) { + StringPool pool; + const PooledStringPtr a = pool.intern("a"); + const PooledStringPtr b = pool.intern("b"); + EXPECT_FALSE(a == b); +} + +TEST(PooledStringPtrTest, OperatorNotEquals) { + StringPool pool; + const PooledStringPtr a = pool.intern("a"); + const PooledStringPtr b = pool.intern("b"); + EXPECT_TRUE(a != b); +} + +} From dd8406a6b7f0cbf9082c4bdb2cc8c3b3d5da6eec Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 19 Jun 2014 01:09:49 +0000 Subject: [PATCH 0013/1155] Fix breakage from r211244 by using LLVM_EXPLICIT to avoid using explicit operators under MSVC where they're not supported. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211246 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/StringPool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h index 7306ce2240b2..3e0465340c3b 100644 --- a/include/llvm/Support/StringPool.h +++ b/include/llvm/Support/StringPool.h @@ -29,6 +29,7 @@ #ifndef LLVM_SUPPORT_STRINGPOOL_H #define LLVM_SUPPORT_STRINGPOOL_H +#include "llvm/Support/Compiler.h" #include "llvm/ADT/StringMap.h" #include #include @@ -128,7 +129,7 @@ namespace llvm { } inline const char *operator*() const { return begin(); } - inline explicit operator bool() const { return S != nullptr; } + inline LLVM_EXPLICIT operator bool() const { return S != nullptr; } inline bool operator==(const PooledStringPtr &That) const { return S == That.S; } inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; } From d9b35435b89015d154b0e20f4d4796d936237f84 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 19 Jun 2014 01:19:19 +0000 Subject: [PATCH 0014/1155] R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211247 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsR600.td | 36 ++++++++++++ lib/Target/R600/AMDGPUISelLowering.cpp | 32 +++++++++- lib/Target/R600/AMDGPUISelLowering.h | 12 ++++ lib/Target/R600/AMDGPUInstrInfo.td | 29 ++++++++++ lib/Target/R600/AMDGPUInstructions.td | 10 ++++ lib/Target/R600/AMDGPUIntrinsics.td | 2 - lib/Target/R600/R600Instructions.td | 2 +- lib/Target/R600/SIInsertWaits.cpp | 2 + lib/Target/R600/SIInstructions.td | 37 +++++++++--- .../InstCombine/InstCombineCalls.cpp | 14 +++++ test/CodeGen/R600/big_alu.ll | 12 ++-- test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll | 27 +++++++++ test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll | 27 +++++++++ test/CodeGen/R600/llvm.AMDGPU.div_scale.ll | 23 ++++++++ test/CodeGen/R600/llvm.AMDGPU.rcp.ll | 58 +++++++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll | 29 ++++++++++ test/CodeGen/R600/pv.ll | 4 +- test/CodeGen/R600/sgpr-copy.ll | 4 +- test/CodeGen/R600/si-sgpr-spill.ll | 18 +++--- .../Transforms/InstCombine/r600-intrinsics.ll | 47 +++++++++++++++ 20 files changed, 393 insertions(+), 32 deletions(-) create mode 100644 test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.div_scale.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.rcp.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll create mode 100644 test/Transforms/InstCombine/r600-intrinsics.ll diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td index ecb5668d8e95..09607d5834d6 100644 --- a/include/llvm/IR/IntrinsicsR600.td +++ b/include/llvm/IR/IntrinsicsR600.td @@ -33,4 +33,40 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tgid">; defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tidig">; + } // End TargetPrefix = "r600" + +let TargetPrefix = "AMDGPU" in { +def int_AMDGPU_div_scale : + Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>, + GCCBuiltin<"__builtin_amdgpu_div_scale">; + +def int_AMDGPU_div_fmas : + Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>, + GCCBuiltin<"__builtin_amdgpu_div_fmas">; + +def int_AMDGPU_div_fixup : + Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>, + GCCBuiltin<"__builtin_amdgpu_div_fixup">; + +def int_AMDGPU_trig_preop : + Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>, + GCCBuiltin<"__builtin_amdgpu_trig_preop">; + +def int_AMDGPU_rcp : + Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>], [IntrNoMem]>, + GCCBuiltin<"__builtin_amdgpu_rcp">; + +def int_AMDGPU_rsq : + Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>], [IntrNoMem]>, + GCCBuiltin<"__builtin_amdgpu_rsq">; + + +} // End TargetPrefix = "AMDGPU" diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 34c2b2bf61d0..1aa92fadbeea 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -842,6 +842,28 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::AMDGPU_div_scale: + return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT, + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::AMDGPU_div_fmas: + return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::AMDGPU_div_fixup: + return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::AMDGPU_trig_preop: + return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT, + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::AMDGPU_rcp: + return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1)); + + case Intrinsic::AMDGPU_rsq: + return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_imax: return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -2042,6 +2064,14 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FMIN) NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) + NODE_NAME_CASE(URECIP) + NODE_NAME_CASE(DIV_SCALE) + NODE_NAME_CASE(DIV_FMAS) + NODE_NAME_CASE(DIV_FIXUP) + NODE_NAME_CASE(TRIG_PREOP) + NODE_NAME_CASE(RCP) + NODE_NAME_CASE(RSQ) + NODE_NAME_CASE(DOT4) NODE_NAME_CASE(BFE_U32) NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) @@ -2051,8 +2081,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MUL_I24) NODE_NAME_CASE(MAD_U24) NODE_NAME_CASE(MAD_I24) - NODE_NAME_CASE(URECIP) - NODE_NAME_CASE(DOT4) NODE_NAME_CASE(EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index b2bb2579dcb3..e2000a04ba4e 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -175,6 +175,9 @@ enum { DWORDADDR, FRACT, CLAMP, + + // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. + // Denormals handled on some parts. COS_HW, SIN_HW, FMAX, @@ -184,6 +187,15 @@ enum { SMIN, UMIN, URECIP, + DIV_SCALE, + DIV_FMAS, + DIV_FIXUP, + TRIG_PREOP, // 1 ULP max error for f64 + + // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. + // For f64, max error 2^29 ULP, handles denormals. + RCP, + RSQ, DOT4, BFE_U32, // Extract range of bits with zero extension to 32-bits. BFE_I32, // Extract range of bits with sign extension to 32-bits. diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 942a9e8ff351..d0ee40a67872 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -19,6 +19,14 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; +def AMDGPUTrigPreOp : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] +>; + +def AMDGPUDivScaleOp : SDTypeProfile<2, 3, + [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] +>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -29,6 +37,12 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; // out = a - floor(a) def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; +// out = 1.0 / a +def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; + +// out = 1.0 / sqrt(a) +def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; + // out = max(a, b) a and b are floats def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, [SDNPCommutative, SDNPAssociative] @@ -78,6 +92,21 @@ def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", // e is rounding error def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; +// Special case divide preop and flags. +def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; + +// Special case divide FMA with scale and flags (src0 = Quotient, +// src1 = Denominator, src2 = Numerator). +def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>; + +// Single or double precision division fixup. +// Special case divide fixup and flags(src0 = Quotient, src1 = +// Denominator, src2 = Numerator). +def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; + +// Look Up 2.0 / pi src0 with segment select src1[4:0] +def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>; + def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, [SDNPHasChain, SDNPMayLoad]>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 8bfc11cd468c..14bfd8cc18d5 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -519,6 +519,16 @@ multiclass Expand24UBitOps { >; } +class RcpPat : Pat < + (fdiv FP_ONE, vt:$src), + (RcpInst $src) +>; + +class RsqPat : Pat < + (AMDGPUrcp (fsqrt vt:$src)), + (RsqInst $src) +>; + include "R600Instructions.td" include "R700Instructions.td" include "EvergreenInstructions.td" diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index 6dc7612d46fb..538b4cd8af28 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -30,8 +30,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 58c704d8ec85..47b7da0955d8 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1083,7 +1083,7 @@ class RECIP_UINT_Common inst> : R600_1OP_Helper < } class RECIPSQRT_CLAMPED_Common inst> : R600_1OP_Helper < - inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq + inst, "RECIPSQRT_CLAMPED", AMDGPUrsq > { let Itinerary = TransALU; } diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index a17fed7e7ea7..173332674ff1 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -341,6 +341,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { return Result; } +// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" +// around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { bool Changes = false; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 60eb8f978180..26024dc4f2df 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1116,22 +1116,23 @@ defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", [(set f32:$dst, (flog2 f32:$src0))] >; + defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", - [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] + [(set f32:$dst, (AMDGPUrcp f32:$src0))] >; defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; defm V_RSQ_LEGACY_F32 : VOP1_32 < 0x0000002d, "V_RSQ_LEGACY_F32", - [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] + [(set f32:$dst, (AMDGPUrsq f32:$src0))] >; defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))] >; defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", - [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] + [(set f64:$dst, (AMDGPUrcp f64:$src0))] >; defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", @@ -1417,8 +1418,12 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; //def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; -defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; -def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; +defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", + [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))] +>; +def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", + [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))] +>; def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64", [(set i64:$dst, (shl i64:$src0, i32:$src1))] @@ -1452,12 +1457,19 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; -defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; -def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; + +defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", + [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))] +>; +def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", + [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))] +>; //def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; //def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; -def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; +def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64", + [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))] +>; //===----------------------------------------------------------------------===// // Pseudo Instructions @@ -1748,6 +1760,15 @@ def : Pat < (S_BARRIER) >; +//===----------------------------------------------------------------------===// +// VOP1 Patterns +//===----------------------------------------------------------------------===// + +def : RcpPat; +def : RcpPat; +def : RsqPat; +def : RsqPat; + //===----------------------------------------------------------------------===// // VOP2 Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d4bdd75fa82a..ff7456415814 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -922,6 +922,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::AMDGPU_rcp: { + if (const ConstantFP *C = dyn_cast(II->getArgOperand(0))) { + const APFloat &ArgVal = C->getValueAPF(); + APFloat Val(ArgVal.getSemantics(), 1.0); + APFloat::opStatus Status = Val.divide(ArgVal, + APFloat::rmNearestTiesToEven); + // Only do this if it was exact and therefore not dependent on the + // rounding mode. + if (Status == APFloat::opOK) + return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); + } + + break; + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll index 6b683769fe06..511e8ef62951 100644 --- a/test/CodeGen/R600/big_alu.ll +++ b/test/CodeGen/R600/big_alu.ll @@ -101,7 +101,7 @@ IF137: ; preds = %main_body %88 = insertelement <4 x float> %87, float %32, i32 2 %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3 %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89) - %91 = call float @llvm.AMDGPU.rsq(float %90) + %91 = call float @llvm.AMDGPU.rsq.f32(float %90) %92 = fmul float %30, %91 %93 = fmul float %31, %91 %94 = fmul float %32, %91 @@ -344,7 +344,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %325 = insertelement <4 x float> %324, float %318, i32 2 %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3 %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326) - %328 = call float @llvm.AMDGPU.rsq(float %327) + %328 = call float @llvm.AMDGPU.rsq.f32(float %327) %329 = fmul float %314, %328 %330 = fmul float %316, %328 %331 = fmul float %318, %328 @@ -377,7 +377,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %358 = insertelement <4 x float> %357, float %45, i32 2 %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3 %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359) - %361 = call float @llvm.AMDGPU.rsq(float %360) + %361 = call float @llvm.AMDGPU.rsq.f32(float %360) %362 = fmul float %45, %361 %363 = call float @fabs(float %362) %364 = fmul float %176, 0x3FECCCCCC0000000 @@ -403,7 +403,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %384 = insertelement <4 x float> %383, float %45, i32 2 %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3 %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385) - %387 = call float @llvm.AMDGPU.rsq(float %386) + %387 = call float @llvm.AMDGPU.rsq.f32(float %386) %388 = fmul float %45, %387 %389 = call float @fabs(float %388) %390 = fmul float %176, 0x3FF51EB860000000 @@ -1041,7 +1041,7 @@ IF179: ; preds = %ENDIF175 %896 = insertelement <4 x float> %895, float %45, i32 2 %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3 %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897) - %899 = call float @llvm.AMDGPU.rsq(float %898) + %899 = call float @llvm.AMDGPU.rsq.f32(float %898) %900 = fmul float %45, %899 %901 = call float @fabs(float %900) %902 = fmul float %176, 0x3FECCCCCC0000000 @@ -1150,7 +1150,7 @@ ENDIF178: ; preds = %ENDIF175, %IF179 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #1 +declare float @llvm.AMDGPU.rsq.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1 diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll new file mode 100644 index 000000000000..c8c73573e073 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone +declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone + +; SI-LABEL: @test_div_fixup_f32: +; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]] +; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { + %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_div_fixup_f64: +; SI: V_DIV_FIXUP_F64 +define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { + %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll new file mode 100644 index 000000000000..4f1e827c2cbd --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone +declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone + +; SI-LABEL: @test_div_fmas_f32: +; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]] +; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_div_fmas_f64: +; SI: V_DIV_FMAS_F64 +define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { + %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll new file mode 100644 index 000000000000..1bcbe2f859ed --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll @@ -0,0 +1,23 @@ +; XFAIL: * +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone +declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone + +; SI-LABEL @test_div_scale_f32: +define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind { + %a = load float addrspace(1)* %aptr, align 4 + %b = load float addrspace(1)* %bptr, align 4 + %result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL @test_div_scale_f64: +define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %b = load double addrspace(1)* %bptr, align 8 + %result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll new file mode 100644 index 000000000000..ca5260dc5bc8 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone +declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone + + +declare float @llvm.sqrt.f32(float) nounwind readnone +declare double @llvm.sqrt.f64(double) nounwind readnone + +; FUNC-LABEL: @rcp_f32 +; SI: V_RCP_F32_e32 +define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind { + %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rcp_f64 +; SI: V_RCP_F64_e32 +define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind { + %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @rcp_pat_f32 +; SI: V_RCP_F32_e32 +define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { + %rcp = fdiv float 1.0, %src + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rcp_pat_f64 +; SI: V_RCP_F64_e32 +define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { + %rcp = fdiv double 1.0, %src + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @rsq_rcp_pat_f32 +; SI: V_RSQ_F32_e32 +define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { + %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone + %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rsq_rcp_pat_f64 +; SI: V_RSQ_F64_e32 +define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { + %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone + %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll new file mode 100644 index 000000000000..1c736d447ea9 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone + +; SI-LABEL: @test_trig_preop_f64: +; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]] +; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]], +; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %b = load i32 addrspace(1)* %bptr, align 4 + %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @test_trig_preop_f64_imm_segment: +; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]], +; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7 +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index f322bc71c6bd..55eb56d3fb1d 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -103,7 +103,7 @@ main_body: %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95) %97 = call float @fabs(float %96) - %98 = call float @llvm.AMDGPU.rsq(float %97) + %98 = call float @llvm.AMDGPU.rsq.f32(float %97) %99 = fmul float %4, %98 %100 = fmul float %5, %98 %101 = fmul float %6, %98 @@ -225,7 +225,7 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 declare float @fabs(float) #2 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #1 +declare float @llvm.AMDGPU.rsq.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #1 diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll index c581d86b99bd..c7d5bf90644e 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/R600/sgpr-copy.ll @@ -70,7 +70,7 @@ main_body: %55 = fadd float %54, %53 %56 = fmul float %45, %45 %57 = fadd float %55, %56 - %58 = call float @llvm.AMDGPU.rsq(float %57) + %58 = call float @llvm.AMDGPU.rsq.f32(float %57) %59 = fmul float %43, %58 %60 = fmul float %44, %58 %61 = fmul float %45, %58 @@ -212,7 +212,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #3 +declare float @llvm.AMDGPU.rsq.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll index b34a757d9b65..53a096513bbc 100644 --- a/test/CodeGen/R600/si-sgpr-spill.ll +++ b/test/CodeGen/R600/si-sgpr-spill.ll @@ -203,7 +203,7 @@ main_body: %198 = fadd float %197, %196 %199 = fmul float %97, %97 %200 = fadd float %198, %199 - %201 = call float @llvm.AMDGPU.rsq(float %200) + %201 = call float @llvm.AMDGPU.rsq.f32(float %200) %202 = fmul float %95, %201 %203 = fmul float %96, %201 %204 = fmul float %202, %29 @@ -384,7 +384,7 @@ IF67: ; preds = %LOOP65 %355 = fadd float %354, %353 %356 = fmul float %352, %352 %357 = fadd float %355, %356 - %358 = call float @llvm.AMDGPU.rsq(float %357) + %358 = call float @llvm.AMDGPU.rsq.f32(float %357) %359 = fmul float %350, %358 %360 = fmul float %351, %358 %361 = fmul float %352, %358 @@ -512,7 +512,7 @@ IF67: ; preds = %LOOP65 %483 = fadd float %482, %481 %484 = fmul float %109, %109 %485 = fadd float %483, %484 - %486 = call float @llvm.AMDGPU.rsq(float %485) + %486 = call float @llvm.AMDGPU.rsq.f32(float %485) %487 = fmul float %107, %486 %488 = fmul float %108, %486 %489 = fmul float %109, %486 @@ -541,7 +541,7 @@ IF67: ; preds = %LOOP65 %512 = fadd float %511, %510 %513 = fmul float %97, %97 %514 = fadd float %512, %513 - %515 = call float @llvm.AMDGPU.rsq(float %514) + %515 = call float @llvm.AMDGPU.rsq.f32(float %514) %516 = fmul float %95, %515 %517 = fmul float %96, %515 %518 = fmul float %97, %515 @@ -658,7 +658,7 @@ declare i32 @llvm.SI.tid() #2 declare float @ceil(float) #3 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #2 +declare float @llvm.AMDGPU.rsq.f32(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1 @@ -887,7 +887,7 @@ main_body: %212 = fadd float %211, %210 %213 = fmul float %209, %209 %214 = fadd float %212, %213 - %215 = call float @llvm.AMDGPU.rsq(float %214) + %215 = call float @llvm.AMDGPU.rsq.f32(float %214) %216 = fmul float %205, %215 %217 = fmul float %207, %215 %218 = fmul float %209, %215 @@ -1123,7 +1123,7 @@ IF189: ; preds = %LOOP %434 = fsub float -0.000000e+00, %433 %435 = fadd float 0x3FF00068E0000000, %434 %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00) - %437 = call float @llvm.AMDGPU.rsq(float %436) + %437 = call float @llvm.AMDGPU.rsq.f32(float %436) %438 = fmul float %437, %436 %439 = fsub float -0.000000e+00, %436 %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00) @@ -1147,7 +1147,7 @@ IF189: ; preds = %LOOP %458 = fadd float %457, %456 %459 = fmul float %455, %455 %460 = fadd float %458, %459 - %461 = call float @llvm.AMDGPU.rsq(float %460) + %461 = call float @llvm.AMDGPU.rsq.f32(float %460) %462 = fmul float %451, %461 %463 = fmul float %453, %461 %464 = fmul float %455, %461 @@ -1257,7 +1257,7 @@ ENDIF197: ; preds = %IF189, %IF198 %559 = fadd float %558, %557 %560 = fmul float %556, %556 %561 = fadd float %559, %560 - %562 = call float @llvm.AMDGPU.rsq(float %561) + %562 = call float @llvm.AMDGPU.rsq.f32(float %561) %563 = fmul float %562, %561 %564 = fsub float -0.000000e+00, %561 %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00) diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll new file mode 100644 index 000000000000..1db6b0d28bf5 --- /dev/null +++ b/test/Transforms/InstCombine/r600-intrinsics.ll @@ -0,0 +1,47 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone +declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone + +; CHECK-LABEL: @test_constant_fold_rcp_f32_1 +; CHECK-NEXT: ret float 1.000000e+00 +define float @test_constant_fold_rcp_f32_1() nounwind { + %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f64_1 +; CHECK-NEXT: ret double 1.000000e+00 +define double @test_constant_fold_rcp_f64_1() nounwind { + %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f32_half +; CHECK-NEXT: ret float 2.000000e+00 +define float @test_constant_fold_rcp_f32_half() nounwind { + %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f64_half +; CHECK-NEXT: ret double 2.000000e+00 +define double @test_constant_fold_rcp_f64_half() nounwind { + %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone + ret double %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f32_43 +; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) +define float @test_constant_fold_rcp_f32_43() nounwind { + %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone + ret float %val +} + +; CHECK-LABEL: @test_constant_fold_rcp_f64_43 +; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) +define double @test_constant_fold_rcp_f64_43() nounwind { + %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone + ret double %val +} + From f9ec8fe70c5084a15372cc0b126218f699794e24 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 19 Jun 2014 01:25:43 +0000 Subject: [PATCH 0015/1155] MS asm: Properly handle quoted symbol names We would get confused by '@' characters in symbol names, we would mistake the text following them for the variant kind. When an identifier a string, the variant kind will never show up inside of it. Instead, check to see if there is a variant following the string. This fixes PR19965. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211249 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 14 +++++++++++++- lib/Target/X86/AsmParser/X86AsmParser.cpp | 6 ++++-- test/MC/X86/intel-syntax.s | 8 ++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 932d0f3318ec..1ba02d181dbb 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -812,7 +812,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // Parse symbol variant std::pair Split; if (!MAI.useParensForSymbolVariant()) { - Split = Identifier.split('@'); + if (FirstTokenKind == AsmToken::String) { + if (Lexer.is(AsmToken::At)) { + Lexer.Lex(); // eat @ + SMLoc AtLoc = getLexer().getLoc(); + StringRef VName; + if (parseIdentifier(VName)) + return Error(AtLoc, "expected symbol variant after '@'"); + + Split = std::make_pair(Identifier, VName); + } + } else { + Split = Identifier.split('@'); + } } else if (Lexer.is(AsmToken::LParen)) { Lexer.Lex(); // eat ( StringRef VName; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index b30eeeb0e037..3e57914f9e94 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1061,7 +1061,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) break; - switch (getLexer().getKind()) { + AsmToken::TokenKind TK = getLexer().getKind(); + switch (TK) { default: { if (SM.isValidEndState()) { Done = true; @@ -1073,13 +1074,14 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { Done = true; break; } + case AsmToken::String: case AsmToken::Identifier: { // This could be a register or a symbolic displacement. unsigned TmpReg; const MCExpr *Val; SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); - if(!ParseRegister(TmpReg, IdentLoc, End)) { + if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { SM.onRegister(TmpReg); UpdateLocLex = false; break; diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index 540282a74c73..796891880b12 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -599,3 +599,11 @@ fxrstor64 opaque ptr [rax] // CHECK: movq _g0+8, %rcx mov rbx, qword ptr [_g0] mov rcx, qword ptr [_g0 + 8] + +"?half@?0??bar@@YAXXZ@4NA": + .quad 4602678819172646912 + +fadd "?half@?0??bar@@YAXXZ@4NA" +fadd "?half@?0??bar@@YAXXZ@4NA"@IMGREL +// CHECK: fadds "?half@?0??bar@@YAXXZ@4NA" +// CHECK: fadds "?half@?0??bar@@YAXXZ@4NA"@IMGREL32 From bdb4aca20211b9ed31874c1be0d814fe26f10a24 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Thu, 19 Jun 2014 03:28:28 +0000 Subject: [PATCH 0016/1155] Remove redundant code in InstCombineShift, no functionality change because instsimplify already does this and instcombine calls instsimplify a few lines above. Patch by Suyog Sarda! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211250 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineShifts.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index cc6665c947d7..2495747da5fe 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -789,11 +789,6 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { // have a sign-extend idiom. Value *X; if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) { - // If the left shift is just shifting out partial signbits, delete the - // extension. - if (cast(Op0)->hasNoSignedWrap()) - return ReplaceInstUsesWith(I, X); - // If the input is an extension from the shifted amount value, e.g. // %x = zext i8 %A to i32 // %y = shl i32 %x, 24 From 650b6ea893c118bf0c6e40cd3002954cc34df2bb Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Thu, 19 Jun 2014 03:35:49 +0000 Subject: [PATCH 0017/1155] Make instsimplify's analysis of icmp eq/ne use computeKnownBits to determine whether the icmp is always true or false. Patch by Suyog Sarda! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211251 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 19 +++++++++++++++++++ test/Transforms/InstCombine/align-2d-gep.ll | 2 +- test/Transforms/InstSimplify/compare.ll | 19 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 3684fda854fc..b06d9948ac19 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2241,6 +2241,25 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // If a bit is known to be zero for A and known to be one for B, + // then A and B cannot be equal. + if (ICmpInst::isEquality(Pred)) { + if (ConstantInt *CI = dyn_cast(RHS)) { + uint32_t BitWidth = CI->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + if (((LHSKnownOne & RHSKnownZero) != 0) || + ((LHSKnownZero & RHSKnownOne) != 0)) + return (Pred == ICmpInst::ICMP_EQ) + ? ConstantInt::getFalse(CI->getContext()) + : ConstantInt::getTrue(CI->getContext()); + } + } + // Special logic for binary operators. BinaryOperator *LBO = dyn_cast(LHS); BinaryOperator *RBO = dyn_cast(RHS); diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll index 5bca46d5a21d..f6a877684ce4 100644 --- a/test/Transforms/InstCombine/align-2d-gep.ll +++ b/test/Transforms/InstCombine/align-2d-gep.ll @@ -31,7 +31,7 @@ bb1: store <2 x double>, <2 x double>* %r, align 8 %indvar.next = add i64 %j, 2 - %exitcond = icmp eq i64 %indvar.next, 557 + %exitcond = icmp eq i64 %indvar.next, 556 br i1 %exitcond, label %bb11, label %bb1 bb11: diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll index 105e244ed8cf..89fd6362d78d 100644 --- a/test/Transforms/InstSimplify/compare.ll +++ b/test/Transforms/InstSimplify/compare.ll @@ -883,3 +883,22 @@ define i1 @returns_nonnull() { ; CHECK: ret i1 false } +; If a bit is known to be zero for A and known to be one for B, +; then A and B cannot be equal. +define i1 @icmp_eq_const(i32 %a) nounwind { + %b = mul nsw i32 %a, -2 + %c = icmp eq i32 %b, 1 + ret i1 %c + +; CHECK-LABEL: @icmp_eq_const +; CHECK-NEXT: ret i1 false +} + +define i1 @icmp_ne_const(i32 %a) nounwind { + %b = mul nsw i32 %a, -2 + %c = icmp ne i32 %b, 1 + ret i1 %c + +; CHECK-LABEL: @icmp_ne_const +; CHECK-NEXT: ret i1 true +} From fe3a219355b58428b93461c6c0517a9b90ebe962 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Thu, 19 Jun 2014 03:51:46 +0000 Subject: [PATCH 0018/1155] Move optimization of some cases of (A & C1)|(B & C2) from instcombine to instsimplify. Patch by Rahul Jain, plus some last minute changes by me -- you can blame me for any bugs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211252 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 32 ++++++++++++++++ .../InstCombine/InstCombineAndOrXor.cpp | 23 ------------ test/Transforms/InstSimplify/apint-or.ll | 37 +++++++++++++++++++ 3 files changed, 69 insertions(+), 23 deletions(-) create mode 100644 test/Transforms/InstSimplify/apint-or.ll diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index b06d9948ac19..8aa6e5a190bf 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1625,6 +1625,38 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; + // (A & C)|(B & D) + Value *C = nullptr, *D = nullptr; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + ConstantInt *C1 = dyn_cast(C); + ConstantInt *C2 = dyn_cast(D); + if (C1 && C2 && (C1->getValue() == ~C2->getValue())) { + // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + Value *V1, *V2; + if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return A; + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return A; + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue() + 1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return B; + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return B; + } + } + } + // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4f5d65ab785f..b23a606e0889 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1996,29 +1996,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { C1 = dyn_cast(C); C2 = dyn_cast(D); if (C1 && C2) { // (A & C1)|(B & C2) - // If we have: ((V + N) & C1) | (V & C2) - // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 - // replace with V+N. - if (C1->getValue() == ~C2->getValue()) { - if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) - return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) - return ReplaceInstUsesWith(I, A); - } - // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue()+1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) - return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) - return ReplaceInstUsesWith(I, B); - } - } - if ((C1->getValue() & C2->getValue()) == 0) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll new file mode 100644 index 000000000000..5d314db7133d --- /dev/null +++ b/test/Transforms/InstSimplify/apint-or.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -instsimplify -S | not grep or + +; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0. +define i39 @test1(i39 %V, i39 %M) { + ;; If we have: ((V + N) & C1) | (V & C2) + ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + ;; replace with V+N. + %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943 + %N = and i39 %M, 274877906944 + %A = add i39 %V, %N + %B = and i39 %A, %C1 + %D = and i39 %V, 274877906943 + %R = or i39 %B, %D + ret i39 %R +; CHECK-LABEL @test1 +; CHECK-NEXT: and {{.*}}, -274877906944 +; CHECK-NEXT: add +; CHECK-NEXT: ret +} + +; Test the case where Integer BitWidth > 64 && BitWidth <= 1024. +define i399 @test2(i399 %V, i399 %M) { + ;; If we have: ((V + N) & C1) | (V & C2) + ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + ;; replace with V+N. + %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943 + %N = and i399 %M, 18446742974197923840 + %A = add i399 %V, %N + %B = and i399 %A, %C1 + %D = and i399 %V, 274877906943 + %R = or i399 %B, %D + ret i399 %R +; CHECK-LABEL @test2 +; CHECK-NEXT: and {{.*}}, 18446742974197923840 +; CHECK-NEXT: add +; CHECK-NEXT: ret +} From 64429cefba00547d37b50383536a62047fd93380 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 19 Jun 2014 04:24:43 +0000 Subject: [PATCH 0019/1155] R600: Add a few tests I forgot to add. These belong with r210827 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211253 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/R600/llvm.AMDGPU.abs.ll | 48 ++++++++++++++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.clamp.ll | 28 +++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.fract.ll | 27 +++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 test/CodeGen/R600/llvm.AMDGPU.abs.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.clamp.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.fract.ll diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll new file mode 100644 index 000000000000..a0a47b7c4701 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone + +; Legacy name +declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone + +; FUNC-LABEL: @s_abs_i32 +; SI: S_SUB_I32 +; SI: S_MAX_I32 +; SI: S_ENDPGM + +; EG: SUB_INT +; EG: MAX_INT +define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind { + %abs = call i32 @llvm.AMDGPU.abs(i32 %src) nounwind readnone + store i32 %abs, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_abs_i32 +; SI: V_SUB_I32_e32 +; SI: V_MAX_I32_e32 +; SI: S_ENDPGM + +; EG: SUB_INT +; EG: MAX_INT +define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { + %val = load i32 addrspace(1)* %src, align 4 + %abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone + store i32 %abs, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @abs_i32_legacy_amdil +; SI: V_SUB_I32_e32 +; SI: V_MAX_I32_e32 +; SI: S_ENDPGM + +; EG: SUB_INT +; EG: MAX_INT +define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { + %val = load i32 addrspace(1)* %src, align 4 + %abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone + store i32 %abs, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll new file mode 100644 index 000000000000..d608953a0dd2 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone +declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone + +; FUNC-LABEL: @clamp_0_1_f32 +; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0 +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM + +; EG: MOV_SAT +define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32 +; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0 +; SI: BUFFER_STORE_DWORD [[RESULT]] +define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind { + %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll new file mode 100644 index 000000000000..72ec1c57571e --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone + +; Legacy name +declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone + +; FUNC-LABEL: @fract_f32 +; SI: V_FRACT_F32 +; EG: FRACT +define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind { + %val = load float addrspace(1)* %src, align 4 + %fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone + store float %fract, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @fract_f32_legacy_amdil +; SI: V_FRACT_F32 +; EG: FRACT +define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind { + %val = load float addrspace(1)* %src, align 4 + %fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone + store float %fract, float addrspace(1)* %out, align 4 + ret void +} From bd01df2487e451fcdac23875f2a101b8e93fbe6e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 19 Jun 2014 06:10:58 +0000 Subject: [PATCH 0020/1155] Convert some assert(0) to llvm_unreachable or fold an 'if' condition into the assert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211254 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/StreamableMemoryObject.h | 7 ++++--- lib/MC/ELFObjectWriter.cpp | 3 +-- lib/TableGen/Record.cpp | 10 ++-------- lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +--- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 3 +-- .../Disassembler/AArch64ExternalSymbolizer.cpp | 3 +-- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 8 ++------ lib/Target/ARM/A15SDOptimizer.cpp | 3 +-- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 3 +-- lib/Target/Hexagon/HexagonInstrInfo.cpp | 7 +++---- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 2 +- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 14 +++++--------- lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp | 2 +- lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp | 2 +- .../Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp | 2 +- 15 files changed, 26 insertions(+), 47 deletions(-) diff --git a/include/llvm/Support/StreamableMemoryObject.h b/include/llvm/Support/StreamableMemoryObject.h index 9c9e55c0a75a..6e71ad47c8dd 100644 --- a/include/llvm/Support/StreamableMemoryObject.h +++ b/include/llvm/Support/StreamableMemoryObject.h @@ -13,6 +13,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/DataStream.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryObject.h" #include #include @@ -115,7 +116,7 @@ class StreamingMemoryObject : public StreamableMemoryObject { // requiring that the bitcode size be known, or otherwise ensuring that // the memory doesn't go away/get reallocated, but it's // not currently necessary. Users that need the pointer don't stream. - assert(0 && "getPointer in streaming memory objects not allowed"); + llvm_unreachable("getPointer in streaming memory objects not allowed"); return nullptr; } bool isValidAddress(uint64_t address) const override; @@ -154,8 +155,8 @@ class StreamingMemoryObject : public StreamableMemoryObject { kChunkSize); BytesRead += bytes; if (bytes < kChunkSize) { - if (ObjectSize && BytesRead < Pos) - assert(0 && "Unexpected short read fetching bitcode"); + assert((!ObjectSize || BytesRead >= Pos) && + "Unexpected short read fetching bitcode"); if (BytesRead <= Pos) { // reached EOF/ran out of bytes ObjectSize = BytesRead; EOFReached = true; diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index a98a13e0cd42..87f6ec0f3d13 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1574,8 +1574,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, break; default: - assert(0 && "FIXME: sh_type value not supported!"); - break; + llvm_unreachable("FIXME: sh_type value not supported!"); } if (TargetObjectWriter->getEMachine() == ELF::EM_ARM && diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index c553a21c261e..f7843dc8360b 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -811,20 +811,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { } case HEAD: { if (ListInit *LHSl = dyn_cast(LHS)) { - if (LHSl->getSize() == 0) { - assert(0 && "Empty list in car"); - return nullptr; - } + assert(LHSl->getSize() != 0 && "Empty list in car"); return LHSl->getElement(0); } break; } case TAIL: { if (ListInit *LHSl = dyn_cast(LHS)) { - if (LHSl->getSize() == 0) { - assert(0 && "Empty list in cdr"); - return nullptr; - } + assert(LHSl->getSize() != 0 && "Empty list in cdr"); // Note the +1. We can't just pass the result of getValues() // directly. ArrayRef::iterator begin = LHSl->getValues().begin()+1; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index e45ca4dbc0d9..bb5290208cb5 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, #ifndef NDEBUG MI->dump(); #endif - assert(0 && "Unexpected instruction for custom inserter!"); - break; + llvm_unreachable("Unexpected instruction for custom inserter!"); case AArch64::F128CSEL: return EmitF128CSEL(MI, BB); @@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); } - llvm_unreachable("Unexpected instruction for custom inserter!"); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f861df0bf99f..5d363a00dc0f 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3447,8 +3447,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { case Match_MnemonicFail: return Error(Loc, "unrecognized instruction mnemonic"); default: - assert(0 && "unexpected error code!"); - return Error(Loc, "invalid instruction format"); + llvm_unreachable("unexpected error code!"); } } diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 24663684a3fd..2057c51346af 100644 --- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) { case LLVMDisassembler_VariantKind_ARM64_TLVP: case LLVMDisassembler_VariantKind_ARM64_TLVOFF: default: - assert(0 && "bad LLVMDisassembler_VariantKind"); - return MCSymbolRefExpr::VK_None; + llvm_unreachable("bad LLVMDisassembler_VariantKind"); } } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 464a18cdbc04..f0513575edb4 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, const MCSubtargetInfo &STI) const { if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - else { - assert(MO.isImm() && "did not expect relocated expression"); - return static_cast(MO.getImm()); - } - assert(0 && "Unable to encode MCOperand!"); - return 0; + assert(MO.isImm() && "did not expect relocated expression"); + return static_cast(MO.getImm()); } template uint32_t diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 94faf6f60db3..92eaf9e1c9b3 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -321,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); } - assert(0 && "Unhandled update pattern!"); - return 0; + llvm_unreachable("Unhandled update pattern!"); } // Return true if this MachineInstr inserts a scalar (SPR) value into diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 5b51a52f828a..b8ee55574972 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1047,8 +1047,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, // we have a movt or a movw, but that led to misleadingly results. // This is now disallowed in the the AsmParser in validateInstruction() // so this should never happen. - assert(0 && "expression without :upper16: or :lower16:"); - return 0; + llvm_unreachable("expression without :upper16: or :lower16:"); } uint32_t ARMMCCodeEmitter:: diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index ea6367a89bce..1c95e06c8923 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1538,14 +1538,13 @@ int HexagonInstrInfo::GetDotOldOp(const int opc) const { int NewOp = opc; if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form NewOp = Hexagon::getPredOldOpcode(NewOp); - if (NewOp < 0) - assert(0 && "Couldn't change predicate new instruction to its old form."); + assert(NewOp >= 0 && + "Couldn't change predicate new instruction to its old form."); } if (isNewValueStore(NewOp)) { // Convert into non-new-value format NewOp = Hexagon::getNonNVStore(NewOp); - if (NewOp < 0) - assert(0 && "Couldn't change new-value store to its old form."); + assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); } return NewOp; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index bc695e6d4f74..d5c3dbc47880 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -65,7 +65,7 @@ class MipsAsmBackend : public MCAsmBackend { const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { // FIXME. - assert(0 && "RelaxInstruction() unimplemented"); + llvm_unreachable("RelaxInstruction() unimplemented"); return false; } diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 5e7cefed0ace..dc1344fb8d3f 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -172,17 +172,13 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, SmallVectorImpl &Fixup, const MCSubtargetInfo &STI) const { if (MO.isReg()) { - if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) { + if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) return MRI.getEncodingValue(MO.getReg()); - } else { - return getHWReg(MO.getReg()); - } - } else if (MO.isImm()) { - return MO.getImm(); - } else { - assert(0); - return 0; + return getHWReg(MO.getReg()); } + + assert(MO.isImm()); + return MO.getImm(); } #include "AMDGPUGenMCCodeEmitter.inc" diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index 261fb3838d37..5975a517994a 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -173,6 +173,6 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum, bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum, raw_ostream &O) { - assert(0 && "FIXME: Implement SparcInstPrinter::printGetPCX."); + llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX."); return true; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 0fbac218cb6b..dcd81e3d6249 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -196,7 +196,7 @@ namespace { const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { // FIXME. - assert(0 && "fixupNeedsRelaxation() unimplemented"); + llvm_unreachable("fixupNeedsRelaxation() unimplemented"); return false; } void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index b19ad7b45ca6..eea9626c17b0 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -133,7 +133,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, if (Expr->EvaluateAsAbsolute(Res)) return Res; - assert(0 && "Unhandled expression!"); + llvm_unreachable("Unhandled expression!"); return 0; } From e793899a078457a1b0b11657ad07978ad9c0c687 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 06:22:01 +0000 Subject: [PATCH 0021/1155] 80-column fixups. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211255 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-mc/llvm-mc.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index a6b74a7b1baa..60e566c16357 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -52,7 +52,8 @@ static cl::opt ShowEncoding("show-encoding", cl::desc("Show instruction encodings")); static cl::opt -CompressDebugSections("compress-debug-sections", cl::desc("Compress DWARF debug sections")); +CompressDebugSections("compress-debug-sections", + cl::desc("Compress DWARF debug sections")); static cl::opt ShowInst("show-inst", cl::desc("Show internal instruction representation")); @@ -240,7 +241,8 @@ static void setDwarfDebugProducer(void) { DwarfDebugProducer += getenv("DEBUG_PRODUCER"); } -static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out) { +static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, + tool_output_file *Out) { AsmLexer Lexer(MAI); Lexer.setBuffer(SrcMgr.getMemoryBuffer(0)); @@ -320,7 +322,8 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out) static int AssembleInput(const char *ProgName, const Target *TheTarget, SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, - MCAsmInfo &MAI, MCSubtargetInfo &STI, MCInstrInfo &MCII) { + MCAsmInfo &MAI, MCSubtargetInfo &STI, + MCInstrInfo &MCII) { std::unique_ptr Parser( createMCAsmParser(SrcMgr, Ctx, Str, MAI)); std::unique_ptr TAP( @@ -391,7 +394,8 @@ int main(int argc, char **argv) { if (CompressDebugSections) { if (!zlib::isAvailable()) { - errs() << ProgName << ": build tools with zlib to enable -compress-debug-sections"; + errs() << ProgName + << ": build tools with zlib to enable -compress-debug-sections"; return 1; } MAI->setCompressDebugSections(true); @@ -479,7 +483,8 @@ int main(int argc, char **argv) { Res = AsLexInput(SrcMgr, *MAI, Out.get()); break; case AC_Assemble: - Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, *MCII); + Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, + *MCII); break; case AC_MDisassemble: assert(IP && "Expected assembly output"); From 054bd5d288c9e9a480583e017dcea5df6d11af3a Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 06:22:05 +0000 Subject: [PATCH 0022/1155] Remove unnecessary include. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211256 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 996dc2122f49..d30588e5d4f1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -47,7 +47,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" From 179bb4e0eef028baf4015a12f962964739d3b542 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 06:22:08 +0000 Subject: [PATCH 0023/1155] Move -dwarf-version to an MC level command line option so it's used by all of the MC level tools and codegen. Fix up all uses in the compiler to use this and set it on the context accordingly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211257 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCTargetOptions.h | 4 +++- include/llvm/MC/MCTargetOptionsCommandFlags.h | 4 ++++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 5 +---- lib/MC/MCTargetOptions.cpp | 3 ++- tools/llvm-mc/llvm-mc.cpp | 14 +++++++------- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h index 80cc8befb7a7..eb4348ed3ec2 100644 --- a/include/llvm/MC/MCTargetOptions.h +++ b/include/llvm/MC/MCTargetOptions.h @@ -29,6 +29,7 @@ class MCTargetOptions { bool ShowMCEncoding : 1; bool ShowMCInst : 1; bool AsmVerbose : 1; + int DwarfVersion; MCTargetOptions(); }; @@ -41,7 +42,8 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) { ARE_EQUAL(MCUseDwarfDirectory) && ARE_EQUAL(ShowMCEncoding) && ARE_EQUAL(ShowMCInst) && - ARE_EQUAL(AsmVerbose)); + ARE_EQUAL(AsmVerbose) && + ARE_EQUAL(DwarfVersion)); #undef ARE_EQUAL } diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h index 17a117a2a3bd..344983dec5f2 100644 --- a/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -33,11 +33,15 @@ cl::opt RelaxAll("mc-relax-all", cl::desc("When used with filetype=obj, " "relax all fixups in the emitted object file")); +cl::opt DwarfVersion("dwarf-version", cl::desc("Dwarf version"), + cl::init(0)); + static inline MCTargetOptions InitMCTargetOptionsFromFlags() { MCTargetOptions Options; Options.SanitizeAddress = (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress); Options.MCRelaxAll = RelaxAll; + Options.DwarfVersion = DwarfVersion; return Options; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index a88aebd2d22f..7aaf731b6f8b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); -static cl::opt -DwarfVersionNumber("dwarf-version", cl::Hidden, - cl::desc("Generate DWARF for dwarf version."), cl::init(0)); - static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; @@ -209,6 +205,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp index 8e946d57f7fb..efd724a15df6 100644 --- a/lib/MC/MCTargetOptions.cpp +++ b/lib/MC/MCTargetOptions.cpp @@ -14,6 +14,7 @@ namespace llvm { MCTargetOptions::MCTargetOptions() : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false), MCSaveTempLabels(false), MCUseDwarfDirectory(false), - ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {} + ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false), + DwarfVersion(0) {} } // end namespace llvm diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 60e566c16357..31b3d295b41f 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -150,9 +150,6 @@ static cl::opt GenDwarfForAssembly("g", cl::desc("Generate dwarf debugging info for assembly " "source files")); -static cl::opt -DwarfVersion("dwarf-version", cl::desc("Dwarf version"), cl::init(4)); - static cl::opt DebugCompilationDir("fdebug-compilation-dir", cl::desc("Specifies the debug info's compilation dir")); @@ -323,12 +320,12 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, static int AssembleInput(const char *ProgName, const Target *TheTarget, SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, MCAsmInfo &MAI, MCSubtargetInfo &STI, - MCInstrInfo &MCII) { + MCInstrInfo &MCII, MCTargetOptions &MCOptions) { std::unique_ptr Parser( createMCAsmParser(SrcMgr, Ctx, Str, MAI)); std::unique_ptr TAP( - TheTarget->createMCAsmParser(STI, *Parser, MCII, - InitMCTargetOptionsFromFlags())); + TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); + if (!TAP) { errs() << ProgName << ": error: this target does not support assembly parsing.\n"; @@ -359,6 +356,7 @@ int main(int argc, char **argv) { cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); TripleName = Triple::normalize(TripleName); setDwarfDebugFlags(argc, argv); @@ -411,6 +409,8 @@ int main(int argc, char **argv) { Ctx.setAllowTemporaryLabels(false); Ctx.setGenDwarfForAssembly(GenDwarfForAssembly); + // Default to 4 for dwarf version. + unsigned DwarfVersion = MCOptions.DwarfVersion ? MCOptions.DwarfVersion : 4; if (DwarfVersion < 2 || DwarfVersion > 4) { errs() << ProgName << ": Dwarf version " << DwarfVersion << " is not supported." << '\n'; @@ -484,7 +484,7 @@ int main(int argc, char **argv) { break; case AC_Assemble: Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, - *MCII); + *MCII, MCOptions); break; case AC_MDisassemble: assert(IP && "Expected assembly output"); From 6c2e8874b0f1fd993a17ad27c8348ae531cd5464 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 19 Jun 2014 07:14:33 +0000 Subject: [PATCH 0024/1155] InstCombine: Stop two transforms dueling InstCombineMulDivRem has: // Canonicalize (X+C1)*CI -> X*CI+C1*CI. InstCombineAddSub has: // W*X + Y*Z --> W * (X+Z) iff W == Y These two transforms could fight with each other if C1*CI would not fold away to something simpler than a ConstantExpr mul. The InstCombineMulDivRem transform only acted on ConstantInts until r199602 when it was changed to operate on all Constants in order to let it fire on ConstantVectors. To fix this, make this transform more careful by checking to see if we actually folded away C1*CI. This fixes PR20079. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 7 +++++-- test/Transforms/InstCombine/pr20079.ll | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/InstCombine/pr20079.ll diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 9996ebc2e744..497c0b49ab3f 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -203,8 +203,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *X; Constant *C1; if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) { - Value *Add = Builder->CreateMul(X, Op1); - return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, Op1)); + Value *Mul = Builder->CreateMul(C1, Op1); + // Only go forward with the transform if C1*CI simplifies to a tidier + // constant. + if (!match(Mul, m_Mul(m_Value(), m_Value()))) + return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul); } } } diff --git a/test/Transforms/InstCombine/pr20079.ll b/test/Transforms/InstCombine/pr20079.ll new file mode 100644 index 000000000000..3c86ecc5f30b --- /dev/null +++ b/test/Transforms/InstCombine/pr20079.ll @@ -0,0 +1,13 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +@b = internal global [1 x i32] zeroinitializer, align 4 +@c = internal global i32 0, align 4 + +; CHECK-LABEL: @fn1 +; CHECK: [[ADD:%.*]] = add i32 %a, -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], sub (i32 0, i32 zext (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @b, i64 0, i64 0), i32* @c) to i32)) +; CHECK-NEXT: ret i32 [[AND]] +define i32 @fn1(i32 %a) { + %xor = add i32 %a, -1 + %mul = mul nsw i32 %xor, zext (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @b, i64 0, i64 0), i32* @c) to i32) + ret i32 %mul +} From ba928e254e78a06b9e4743dd933df3103c80ed92 Mon Sep 17 00:00:00 2001 From: Alp Toker Date: Thu, 19 Jun 2014 07:25:18 +0000 Subject: [PATCH 0025/1155] Remove OwningPtr.h and associated tests llvm::OwningPtr is superseded by std::unique_ptr. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211259 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/OwningPtr.h | 165 ------------------- unittests/ADT/CMakeLists.txt | 1 - unittests/ADT/OwningPtrTest.cpp | 273 -------------------------------- utils/llvm.natvis | 8 - 4 files changed, 447 deletions(-) delete mode 100644 include/llvm/ADT/OwningPtr.h delete mode 100644 unittests/ADT/OwningPtrTest.cpp diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h deleted file mode 100644 index 5e83358fc071..000000000000 --- a/include/llvm/ADT/OwningPtr.h +++ /dev/null @@ -1,165 +0,0 @@ -//===- llvm/ADT/OwningPtr.h - Smart ptr that owns the pointee ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines and implements the OwningPtr class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADT_OWNINGPTR_H -#define LLVM_ADT_OWNINGPTR_H - -#include "llvm/Support/Compiler.h" -#include -#include -#include - -namespace llvm { - -/// OwningPtr smart pointer - OwningPtr mimics a built-in pointer except that it -/// guarantees deletion of the object pointed to, either on destruction of the -/// OwningPtr or via an explicit reset(). Once created, ownership of the -/// pointee object can be taken away from OwningPtr by using the take method. -template -class OwningPtr { - OwningPtr(OwningPtr const &) LLVM_DELETED_FUNCTION; - OwningPtr &operator=(OwningPtr const &) LLVM_DELETED_FUNCTION; - T *Ptr; -public: - explicit OwningPtr(T *P = 0) : Ptr(P) {} - - OwningPtr(OwningPtr &&Other) : Ptr(Other.take()) {} - - OwningPtr &operator=(OwningPtr &&Other) { - reset(Other.take()); - return *this; - } - - OwningPtr(std::unique_ptr Other) : Ptr(Other.release()) {} - - OwningPtr &operator=(std::unique_ptr Other) { - reset(Other.release()); - return *this; - } - -#if LLVM_HAS_RVALUE_REFERENCE_THIS - operator std::unique_ptr() && { return std::unique_ptr(take()); } -#endif - - ~OwningPtr() { - delete Ptr; - } - - /// reset - Change the current pointee to the specified pointer. Note that - /// calling this with any pointer (including a null pointer) deletes the - /// current pointer. - void reset(T *P = 0) { - if (P == Ptr) return; - T *Tmp = Ptr; - Ptr = P; - delete Tmp; - } - - /// take - Reset the owning pointer to null and return its pointer. This does - /// not delete the pointer before returning it. - T *take() { - T *Tmp = Ptr; - Ptr = nullptr; - return Tmp; - } - - T *release() { return take(); } - - std::unique_ptr take_unique() { return std::unique_ptr(take()); } - - T &operator*() const { - assert(Ptr && "Cannot dereference null pointer"); - return *Ptr; - } - - T *operator->() const { return Ptr; } - T *get() const { return Ptr; } - LLVM_EXPLICIT operator bool() const { return Ptr != nullptr; } - bool operator!() const { return Ptr == nullptr; } - bool isValid() const { return Ptr != nullptr; } - - void swap(OwningPtr &RHS) { - T *Tmp = RHS.Ptr; - RHS.Ptr = Ptr; - Ptr = Tmp; - } -}; - -template -inline void swap(OwningPtr &a, OwningPtr &b) { - a.swap(b); -} - -/// OwningArrayPtr smart pointer - OwningArrayPtr provides the same -/// functionality as OwningPtr, except that it works for array types. -template -class OwningArrayPtr { - OwningArrayPtr(OwningArrayPtr const &) LLVM_DELETED_FUNCTION; - OwningArrayPtr &operator=(OwningArrayPtr const &) LLVM_DELETED_FUNCTION; - T *Ptr; -public: - explicit OwningArrayPtr(T *P = 0) : Ptr(P) {} - - OwningArrayPtr(OwningArrayPtr &&Other) : Ptr(Other.take()) {} - - OwningArrayPtr &operator=(OwningArrayPtr &&Other) { - reset(Other.take()); - return *this; - } - - ~OwningArrayPtr() { - delete [] Ptr; - } - - /// reset - Change the current pointee to the specified pointer. Note that - /// calling this with any pointer (including a null pointer) deletes the - /// current pointer. - void reset(T *P = 0) { - if (P == Ptr) return; - T *Tmp = Ptr; - Ptr = P; - delete [] Tmp; - } - - /// take - Reset the owning pointer to null and return its pointer. This does - /// not delete the pointer before returning it. - T *take() { - T *Tmp = Ptr; - Ptr = 0; - return Tmp; - } - - T &operator[](std::ptrdiff_t i) const { - assert(Ptr && "Cannot dereference null pointer"); - return Ptr[i]; - } - - T *get() const { return Ptr; } - LLVM_EXPLICIT operator bool() const { return Ptr != 0; } - bool operator!() const { return Ptr == nullptr; } - - void swap(OwningArrayPtr &RHS) { - T *Tmp = RHS.Ptr; - RHS.Ptr = Ptr; - Ptr = Tmp; - } -}; - -template -inline void swap(OwningArrayPtr &a, OwningArrayPtr &b) { - a.swap(b); -} - -} // end namespace llvm - -#endif diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt index 51197231f2b5..0f214f3d0c80 100644 --- a/unittests/ADT/CMakeLists.txt +++ b/unittests/ADT/CMakeLists.txt @@ -23,7 +23,6 @@ set(ADTSources MakeUniqueTest.cpp MapVectorTest.cpp OptionalTest.cpp - OwningPtrTest.cpp PackedVectorTest.cpp PointerIntPairTest.cpp PointerUnionTest.cpp diff --git a/unittests/ADT/OwningPtrTest.cpp b/unittests/ADT/OwningPtrTest.cpp deleted file mode 100644 index d83a9471cc56..000000000000 --- a/unittests/ADT/OwningPtrTest.cpp +++ /dev/null @@ -1,273 +0,0 @@ -//===- llvm/unittest/ADT/OwningPtrTest.cpp - OwningPtr unit tests ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/OwningPtr.h" -#include "gtest/gtest.h" -using namespace llvm; - -namespace { - -struct TrackDestructor { - static unsigned Destructions; - int val; - explicit TrackDestructor(int val) : val(val) {} - ~TrackDestructor() { ++Destructions; } - static void ResetCounts() { Destructions = 0; } - -private: - TrackDestructor(const TrackDestructor &other) LLVM_DELETED_FUNCTION; - TrackDestructor & - operator=(const TrackDestructor &other) LLVM_DELETED_FUNCTION; - TrackDestructor(TrackDestructor &&other) LLVM_DELETED_FUNCTION; - TrackDestructor &operator=(TrackDestructor &&other) LLVM_DELETED_FUNCTION; -}; - -unsigned TrackDestructor::Destructions = 0; - -// Test fixture -class OwningPtrTest : public testing::Test {}; - -TEST_F(OwningPtrTest, DefaultConstruction) { - TrackDestructor::ResetCounts(); - { - OwningPtr O; - EXPECT_FALSE(O); - EXPECT_TRUE(!O); - EXPECT_FALSE(O.get()); - EXPECT_FALSE(O.isValid()); - } - EXPECT_EQ(0u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, PtrConstruction) { - TrackDestructor::ResetCounts(); - { - OwningPtr O(new TrackDestructor(3)); - EXPECT_TRUE((bool)O); - EXPECT_FALSE(!O); - EXPECT_TRUE(O.get()); - EXPECT_TRUE(O.isValid()); - EXPECT_EQ(3, (*O).val); - EXPECT_EQ(3, O->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - EXPECT_EQ(1u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, Reset) { - TrackDestructor::ResetCounts(); - OwningPtr O(new TrackDestructor(3)); - EXPECT_EQ(0u, TrackDestructor::Destructions); - O.reset(); - EXPECT_FALSE((bool)O); - EXPECT_TRUE(!O); - EXPECT_FALSE(O.get()); - EXPECT_FALSE(O.isValid()); - EXPECT_EQ(1u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, Take) { - TrackDestructor::ResetCounts(); - TrackDestructor *T = nullptr; - { - OwningPtr O(new TrackDestructor(3)); - T = O.take(); - EXPECT_FALSE((bool)O); - EXPECT_TRUE(!O); - EXPECT_FALSE(O.get()); - EXPECT_FALSE(O.isValid()); - EXPECT_TRUE(T); - EXPECT_EQ(3, T->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - delete T; - EXPECT_EQ(1u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, Release) { - TrackDestructor::ResetCounts(); - TrackDestructor *T = nullptr; - { - OwningPtr O(new TrackDestructor(3)); - T = O.release(); - EXPECT_FALSE((bool)O); - EXPECT_TRUE(!O); - EXPECT_FALSE(O.get()); - EXPECT_FALSE(O.isValid()); - EXPECT_TRUE(T); - EXPECT_EQ(3, T->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - delete T; - EXPECT_EQ(1u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, MoveConstruction) { - TrackDestructor::ResetCounts(); - { - OwningPtr A(new TrackDestructor(3)); - OwningPtr B = std::move(A); - EXPECT_FALSE((bool)A); - EXPECT_TRUE(!A); - EXPECT_FALSE(A.get()); - EXPECT_FALSE(A.isValid()); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_TRUE(B.isValid()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - EXPECT_EQ(1u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, MoveAssignment) { - TrackDestructor::ResetCounts(); - { - OwningPtr A(new TrackDestructor(3)); - OwningPtr B(new TrackDestructor(4)); - B = std::move(A); - EXPECT_FALSE(A); - EXPECT_TRUE(!A); - EXPECT_FALSE(A.get()); - EXPECT_FALSE(A.isValid()); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_TRUE(B.isValid()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(1u, TrackDestructor::Destructions); - } - EXPECT_EQ(2u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, Swap) { - TrackDestructor::ResetCounts(); - { - OwningPtr A(new TrackDestructor(3)); - OwningPtr B(new TrackDestructor(4)); - B.swap(A); - EXPECT_TRUE((bool)A); - EXPECT_FALSE(!A); - EXPECT_TRUE(A.get()); - EXPECT_TRUE(A.isValid()); - EXPECT_EQ(4, (*A).val); - EXPECT_EQ(4, A->val); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_TRUE(B.isValid()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - EXPECT_EQ(2u, TrackDestructor::Destructions); - TrackDestructor::ResetCounts(); - { - OwningPtr A(new TrackDestructor(3)); - OwningPtr B(new TrackDestructor(4)); - swap(A, B); - EXPECT_TRUE((bool)A); - EXPECT_FALSE(!A); - EXPECT_TRUE(A.get()); - EXPECT_TRUE(A.isValid()); - EXPECT_EQ(4, (*A).val); - EXPECT_EQ(4, A->val); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_TRUE(B.isValid()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - EXPECT_EQ(2u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, UniqueToOwningConstruction) { - TrackDestructor::ResetCounts(); - { - std::unique_ptr A(new TrackDestructor(3)); - OwningPtr B = std::move(A); - EXPECT_FALSE(A); - EXPECT_TRUE(!A); - EXPECT_FALSE(A.get()); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_TRUE(B.isValid()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - EXPECT_EQ(1u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, UniqueToOwningAssignment) { - TrackDestructor::ResetCounts(); - { - std::unique_ptr A(new TrackDestructor(3)); - OwningPtr B(new TrackDestructor(4)); - B = std::move(A); - EXPECT_FALSE(A); - EXPECT_TRUE(!A); - EXPECT_FALSE(A.get()); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_TRUE(B.isValid()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(1u, TrackDestructor::Destructions); - } - EXPECT_EQ(2u, TrackDestructor::Destructions); -} - -TEST_F(OwningPtrTest, TakeUniqueConstruction) { - TrackDestructor::ResetCounts(); - { - OwningPtr A(new TrackDestructor(3)); - std::unique_ptr B = A.take_unique(); - EXPECT_FALSE(A); - EXPECT_TRUE(!A); - EXPECT_FALSE(A.get()); - EXPECT_FALSE(A.isValid()); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - EXPECT_EQ(1u, TrackDestructor::Destructions); -} - -#if LLVM_HAS_RVALUE_REFERENCE_THIS -TEST_F(OwningPtrTest, OwningToUniqueConstruction) { - TrackDestructor::ResetCounts(); - { - OwningPtr A(new TrackDestructor(3)); - std::unique_ptr B = std::move(A); - EXPECT_FALSE(A); - EXPECT_TRUE(!A); - EXPECT_FALSE(A.get()); - EXPECT_FALSE(A.isValid()); - EXPECT_TRUE((bool)B); - EXPECT_FALSE(!B); - EXPECT_TRUE(B.get()); - EXPECT_EQ(3, (*B).val); - EXPECT_EQ(3, B->val); - EXPECT_EQ(0u, TrackDestructor::Destructions); - } - EXPECT_EQ(1u, TrackDestructor::Destructions); -} -#endif -} diff --git a/utils/llvm.natvis b/utils/llvm.natvis index 9874ce58d250..6da0b2512a92 100644 --- a/utils/llvm.natvis +++ b/utils/llvm.natvis @@ -108,14 +108,6 @@ or create a symbolic link so it updates automatically. - - empty - OwningPtr {*Ptr} - - Ptr - - - {{ [Small Mode] size={NumElements}, capacity={CurArraySize} }} {{ [Big Mode] size={NumElements}, capacity={CurArraySize} }} From c28beb254dabd5f522bafdbdd59a4e22f0a539ec Mon Sep 17 00:00:00 2001 From: Alp Toker Date: Thu, 19 Jun 2014 07:25:25 +0000 Subject: [PATCH 0026/1155] CommandLine: bail out when options get multiply registered These errors are strictly unrecoverable and indicate serious issues such as conflicting option names or an incorrectly linked LLVM distribution. With this change, the errors actually get detected so tests don't pass silently. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211260 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/CommandLine.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index e09d4b6e47b4..c2b739fa7360 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -145,6 +145,7 @@ void OptionCategory::registerCategory() { static void GetOptionInfo(SmallVectorImpl &PositionalOpts, SmallVectorImpl &SinkOpts, StringMap &OptionsMap) { + bool HadErrors = false; SmallVector OptionNames; Option *CAOpt = nullptr; // The ConsumeAfter option if it exists. for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) { @@ -158,8 +159,9 @@ static void GetOptionInfo(SmallVectorImpl &PositionalOpts, for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { // Add argument to the argument map! if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) { - errs() << ProgramName << ": CommandLine Error: Argument '" - << OptionNames[i] << "' defined more than once!\n"; + errs() << ProgramName << ": CommandLine Error: Option '" + << OptionNames[i] << "' registered more than once!\n"; + HadErrors = true; } } @@ -171,8 +173,10 @@ static void GetOptionInfo(SmallVectorImpl &PositionalOpts, else if (O->getMiscFlags() & cl::Sink) // Remember sink options SinkOpts.push_back(O); else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) { - if (CAOpt) + if (CAOpt) { O->error("Cannot specify more than one option with cl::ConsumeAfter!"); + HadErrors = true; + } CAOpt = O; } } @@ -182,6 +186,12 @@ static void GetOptionInfo(SmallVectorImpl &PositionalOpts, // Make sure that they are in order of registration not backwards. std::reverse(PositionalOpts.begin(), PositionalOpts.end()); + + // Fail hard if there were errors. These are strictly unrecoverable and + // indicate serious issues such as conflicting option names or an incorrectly + // linked LLVM distribution. + if (HadErrors) + report_fatal_error("inconsistency in registered CommandLine options"); } From 83175090522ebd6513e45033c342200cd645f89c Mon Sep 17 00:00:00 2001 From: Dinesh Dwivedi Date: Thu, 19 Jun 2014 08:29:18 +0000 Subject: [PATCH 0027/1155] Refactored and updated SimplifyUsingDistributiveLaws() to * Find factorization opportunities using identity values. * Find factorization opportunities by treating shl(X, C) as mul (X, shl(C)) * Keep NSW flag while simplifying instruction using factorization. This fixes PR19263. Differential Revision: http://reviews.llvm.org/D3799 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211261 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAddSub.cpp | 52 ----- .../InstCombine/InstructionCombining.cpp | 195 +++++++++++++----- test/Transforms/InstCombine/add2.ll | 68 ++++++ 3 files changed, 210 insertions(+), 105 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index f6658964ae85..77b2404b49d4 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -865,30 +865,6 @@ Value *FAddCombine::createAddendVal return createFMul(OpndVal, Coeff.getValue(Instr->getType())); } -// dyn_castFoldableMul - If this value is a multiply that can be folded into -// other computations (because it has a constant operand), return the -// non-constant operand of the multiply, and set CST to point to the multiplier. -// Otherwise, return null. -// -static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { - if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy()) - return nullptr; - - Instruction *I = dyn_cast(V); - if (!I) return nullptr; - - if (I->getOpcode() == Instruction::Mul) - if ((CST = dyn_cast(I->getOperand(1)))) - return I->getOperand(0); - if (I->getOpcode() == Instruction::Shl) - if ((CST = dyn_cast(I->getOperand(1)))) { - // The multiplier is really 1 << CST. - CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST); - return I->getOperand(0); - } - return nullptr; -} - // If one of the operands only has one non-zero bit, and if the other // operand has a known-zero bit in a more significant place than it (not // including the sign bit) the ripple may go up to and fill the zero, but @@ -1089,24 +1065,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = dyn_castNegVal(RHS)) return BinaryOperator::CreateSub(LHS, V); - - { - Constant *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2)) { - if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2)); - - // X*C1 + X*C2 --> X * (C1+C2) - Constant *C1; - if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); - } - - // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2)); - } - // A+B --> A|B iff A and B have no bits set in common. if (IntegerType *IT = dyn_cast(I.getType())) { APInt LHSKnownOne(IT->getBitWidth(), 0); @@ -1593,16 +1551,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } } - Constant *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1)) { - if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1)); - - Constant *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); - } - // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". if (DL) { diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 991ad796a7e3..88d7a0d59eac 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -395,6 +395,127 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, return false; } +/// This function returns identity value for given opcode, which can be used to +/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). +static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) { + if (isa(V)) + return nullptr; + + if (OpCode == Instruction::Mul) + return ConstantInt::get(V->getType(), 1); + + // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc. + + return nullptr; +} + +/// This function factors binary ops which can be combined using distributive +/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4). +Instruction::BinaryOps getBinOpsForFactorization(BinaryOperator *Op, + Value *&LHS, Value *&RHS) { + if (!Op) + return Instruction::BinaryOpsEnd; + + if (Op->getOpcode() == Instruction::Shl) { + if (Constant *CST = dyn_cast(Op->getOperand(1))) { + // The multiplier is really 1 << CST. + RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST); + LHS = Op->getOperand(0); + return Instruction::Mul; + } + } + + // TODO: We can add other conversions e.g. shr => div etc. + + LHS = Op->getOperand(0); + RHS = Op->getOperand(1); + return Op->getOpcode(); +} + +/// This tries to simplify binary operations by factorizing out common terms +/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). +static Value *tryFactorization(InstCombiner::BuilderTy *Builder, + const DataLayout *DL, BinaryOperator &I, + Instruction::BinaryOps InnerOpcode, Value *A, + Value *B, Value *C, Value *D) { + + // If any of A, B, C, D are null, we can not factor I, return early. + // Checking A and C should be enough. + if (!A || !C || !B || !D) + return nullptr; + + Value *SimplifiedInst = nullptr; + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); + + // Does "X op' Y" always equal "Y op' X"? + bool InnerCommutative = Instruction::isCommutative(InnerOpcode); + + // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? + if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (InnerCommutative && A == D)) { + if (A != C) + std::swap(C, D); + // Consider forming "A op' (B op D)". + // If "B op D" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL); + // If "B op D" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); + if (V) { + SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V); + } + } + + // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? + if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (InnerCommutative && B == C)) { + if (B != D) + std::swap(C, D); + // Consider forming "(A op C) op' B". + // If "A op C" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL); + + // If "A op C" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); + if (V) { + SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B); + } + } + + if (SimplifiedInst) { + ++NumFactor; + SimplifiedInst->takeName(&I); + + // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag. + // TODO: Check for NUW. + if (BinaryOperator *BO = dyn_cast(SimplifiedInst)) { + if (isa(SimplifiedInst)) { + bool HasNSW = false; + if (isa(&I)) + HasNSW = I.hasNoSignedWrap(); + + if (BinaryOperator *Op0 = dyn_cast(LHS)) + if (isa(Op0)) + HasNSW &= Op0->hasNoSignedWrap(); + + if (BinaryOperator *Op1 = dyn_cast(RHS)) + if (isa(Op1)) + HasNSW &= Op1->hasNoSignedWrap(); + BO->setHasNoSignedWrap(HasNSW); + } + } + } + return SimplifiedInst; +} + /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations /// which some other binary operation distributes over either by factorizing /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this @@ -404,65 +525,33 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); - Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op // Factorization. - if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) { - // The instruction has the form "(A op' B) op (C op' D)". Try to factorize - // a common term. - Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); - Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); - Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; + Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B); + Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D); + + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + if (LHSOpcode == RHSOpcode) { + if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D)) + return V; + } - // Does "X op' Y" always equal "Y op' X"? - bool InnerCommutative = Instruction::isCommutative(InnerOpcode); - - // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? - if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) - // Does the instruction have the form "(A op' B) op (A op' D)" or, in the - // commutative case, "(A op' B) op (C op' A)"? - if (A == C || (InnerCommutative && A == D)) { - if (A != C) - std::swap(C, D); - // Consider forming "A op' (B op D)". - // If "B op D" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL); - // If "B op D" doesn't simplify then only go on if both of the existing - // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && Op0->hasOneUse() && Op1->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName()); - if (V) { - ++NumFactor; - V = Builder->CreateBinOp(InnerOpcode, A, V); - V->takeName(&I); - return V; - } - } + // The instruction has the form "(A op' B) op (C)". Try to factorize common + // term. + if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS, + getIdentityValue(LHSOpcode, RHS))) + return V; - // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? - if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) - // Does the instruction have the form "(A op' B) op (C op' B)" or, in the - // commutative case, "(A op' B) op (B op' D)"? - if (B == D || (InnerCommutative && B == C)) { - if (B != D) - std::swap(C, D); - // Consider forming "(A op C) op' B". - // If "A op C" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL); - // If "A op C" doesn't simplify then only go on if both of the existing - // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && Op0->hasOneUse() && Op1->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName()); - if (V) { - ++NumFactor; - V = Builder->CreateBinOp(InnerOpcode, V, B); - V->takeName(&I); - return V; - } - } - } + // The instruction has the form "(B) op (C op' D)". Try to factorize common + // term. + if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, + getIdentityValue(RHSOpcode, LHS), C, D)) + return V; // Expansion. + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out // to "(A op C) op' (B op C)" results in simplifications. diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll index aaf3a7a235bf..c728b4437af4 100644 --- a/test/Transforms/InstCombine/add2.ll +++ b/test/Transforms/InstCombine/add2.ll @@ -86,3 +86,71 @@ define i16 @test9(i16 %a) { ; CHECK-NEXT: %d = mul i16 %a, -32767 ; CHECK-NEXT: ret i16 %d } + +define i16 @add_nsw_mul_nsw(i16 %x) { + %add1 = add nsw i16 %x, %x + %add2 = add nsw i16 %add1, %x + ret i16 %add2 +; CHECK-LABEL: @add_nsw_mul_nsw( +; CHECK-NEXT: %add2 = mul nsw i16 %x, 3 +; CHECK-NEXT: ret i16 %add2 +} + +define i16 @mul_add_to_mul_1(i16 %x) { + %mul1 = mul nsw i16 %x, 8 + %add2 = add nsw i16 %x, %mul1 + ret i16 %add2 +; CHECK-LABEL: @mul_add_to_mul_1( +; CHECK-NEXT: %add2 = mul nsw i16 %x, 9 +; CHECK-NEXT: ret i16 %add2 +} + +define i16 @mul_add_to_mul_2(i16 %x) { + %mul1 = mul nsw i16 %x, 8 + %add2 = add nsw i16 %mul1, %x + ret i16 %add2 +; CHECK-LABEL: @mul_add_to_mul_2( +; CHECK-NEXT: %add2 = mul nsw i16 %x, 9 +; CHECK-NEXT: ret i16 %add2 +} + +define i16 @mul_add_to_mul_3(i16 %a) { + %mul1 = mul i16 %a, 2 + %mul2 = mul i16 %a, 3 + %add = add nsw i16 %mul1, %mul2 + ret i16 %add +; CHECK-LABEL: @mul_add_to_mul_3( +; CHECK-NEXT: %add = mul i16 %a, 5 +; CHECK-NEXT: ret i16 %add +} + +define i16 @mul_add_to_mul_4(i16 %a) { + %mul1 = mul nsw i16 %a, 2 + %mul2 = mul nsw i16 %a, 7 + %add = add nsw i16 %mul1, %mul2 + ret i16 %add +; CHECK-LABEL: @mul_add_to_mul_4( +; CHECK-NEXT: %add = mul nsw i16 %a, 9 +; CHECK-NEXT: ret i16 %add +} + +define i16 @mul_add_to_mul_5(i16 %a) { + %mul1 = mul nsw i16 %a, 3 + %mul2 = mul nsw i16 %a, 7 + %add = add nsw i16 %mul1, %mul2 + ret i16 %add +; CHECK-LABEL: @mul_add_to_mul_5( +; CHECK-NEXT: %add = mul nsw i16 %a, 10 +; CHECK-NEXT: ret i16 %add +} + +define i32 @mul_add_to_mul_6(i32 %x, i32 %y) { + %mul1 = mul nsw i32 %x, %y + %mul2 = mul nsw i32 %mul1, 5 + %add = add nsw i32 %mul1, %mul2 + ret i32 %add +; CHECK-LABEL: @mul_add_to_mul_6( +; CHECK-NEXT: %mul1 = mul nsw i32 %x, %y +; CHECK-NEXT: %add = mul nsw i32 %mul1, 6 +; CHECK-NEXT: ret i32 %add +} From cfdf8052865b01e8b8d321640c3f51ff938cc3c4 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 19 Jun 2014 10:29:41 +0000 Subject: [PATCH 0028/1155] [X86] Teach how to combine horizontal binop even in the presence of undefs. Before this change, the backend was unable to fold a build_vector dag node with UNDEF operands into a single horizontal add/sub. This patch teaches how to combine a build_vector with UNDEF operands into a horizontal add/sub when possible. The algorithm conservatively avoids to combine a build_vector with only a single non-UNDEF operand. Added test haddsub-undef.ll to verify that we correctly fold horizontal binop even in the presence of UNDEFs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211265 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 155 ++++++++++---- test/CodeGen/X86/haddsub-undef.ll | 325 +++++++++++++++++++++++++++++ 2 files changed, 440 insertions(+), 40 deletions(-) create mode 100644 test/CodeGen/X86/haddsub-undef.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 851607eac96e..a7b6e707816f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6077,21 +6077,35 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { /// This function only analyzes elements of \p N whose indices are /// in range [BaseIdx, LastIdx). static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, + SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1) { + EVT VT = N->getValueType(0); + assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); - assert(N->getValueType(0).isVector() && - N->getValueType(0).getVectorNumElements() >= LastIdx && + assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && "Invalid Vector in input!"); bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); bool CanFold = true; unsigned ExpectedVExtractIdx = BaseIdx; unsigned NumElts = LastIdx - BaseIdx; + V0 = DAG.getUNDEF(VT); + V1 = DAG.getUNDEF(VT); // Check if N implements a horizontal binop. for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) { SDValue Op = N->getOperand(i + BaseIdx); + + // Skip UNDEFs. + if (Op->getOpcode() == ISD::UNDEF) { + // Update the expected vector extract index. + if (i * 2 == NumElts) + ExpectedVExtractIdx = BaseIdx; + ExpectedVExtractIdx += 2; + continue; + } + CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); if (!CanFold) @@ -6112,12 +6126,15 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, unsigned I0 = cast(Op0.getOperand(1))->getZExtValue(); unsigned I1 = cast(Op1.getOperand(1))->getZExtValue(); - - if (i == 0) - V0 = Op0.getOperand(0); - else if (i * 2 == NumElts) { - V1 = Op0.getOperand(0); - ExpectedVExtractIdx = BaseIdx; + + if (i * 2 < NumElts) { + if (V0.getOpcode() == ISD::UNDEF) + V0 = Op0.getOperand(0); + } else { + if (V1.getOpcode() == ISD::UNDEF) + V1 = Op0.getOperand(0); + if (i * 2 == NumElts) + ExpectedVExtractIdx = BaseIdx; } SDValue Expected = (i * 2 < NumElts) ? V0 : V1; @@ -6163,9 +6180,14 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, /// Example: /// HADD V0_LO, V1_LO /// HADD V0_HI, V1_HI +/// +/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower +/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to +/// the upper 128-bits of the result. static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, SDLoc DL, SelectionDAG &DAG, - unsigned X86Opcode, bool Mode) { + unsigned X86Opcode, bool Mode, + bool isUndefLO, bool isUndefHI) { EVT VT = V0.getValueType(); assert(VT.is256BitVector() && VT == V1.getValueType() && "Invalid nodes in input!"); @@ -6177,13 +6199,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL); EVT NewVT = V0_LO.getValueType(); - SDValue LO, HI; + SDValue LO = DAG.getUNDEF(NewVT); + SDValue HI = DAG.getUNDEF(NewVT); + if (Mode) { - LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); - HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); + // Don't emit a horizontal binop if the result is expected to be UNDEF. + if (!isUndefLO && V0->getOpcode() != ISD::UNDEF) + LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); + if (!isUndefHI && V1->getOpcode() != ISD::UNDEF) + HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); } else { - LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); - HI = DAG.getNode(X86Opcode, DL, NewVT, V1_HI, V1_HI); + // Don't emit a horizontal binop if the result is expected to be UNDEF. + if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF || + V1_LO->getOpcode() != ISD::UNDEF)) + LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); + + if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF || + V1_HI->getOpcode() != ISD::UNDEF)) + HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); @@ -6198,19 +6231,37 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, SDValue InVec0, InVec1; // Try to match horizontal ADD/SUB. + unsigned NumUndefsLO = 0; + unsigned NumUndefsHI = 0; + unsigned Half = NumElts/2; + + // Count the number of UNDEF operands in the build_vector in input. + for (unsigned i = 0, e = Half; i != e; ++i) + if (BV->getOperand(i)->getOpcode() == ISD::UNDEF) + NumUndefsLO++; + + for (unsigned i = Half, e = NumElts; i != e; ++i) + if (BV->getOperand(i)->getOpcode() == ISD::UNDEF) + NumUndefsHI++; + + // Early exit if this is either a build_vector of all UNDEFs or all the + // operands but one are UNDEF. + if (NumUndefsLO + NumUndefsHI + 1 >= NumElts) + return SDValue(); + if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) { // Try to match an SSE3 float HADD/HSUB. - if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts, InVec0, InVec1)) + if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); - if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts, InVec0, InVec1)) + if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) { // Try to match an SSSE3 integer HADD/HSUB. - if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts, InVec0, InVec1)) + if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1); - if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts, InVec0, InVec1)) + if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1); } @@ -6221,16 +6272,20 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, // Try to match an AVX horizontal add/sub of packed single/double // precision floating point values from 256-bit vectors. SDValue InVec2, InVec3; - if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts/2, InVec0, InVec1) && - isHorizontalBinOp(BV, ISD::FADD, NumElts/2, NumElts, InVec2, InVec3) && - InVec0.getNode() == InVec2.getNode() && - InVec1.getNode() == InVec3.getNode()) + if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); - if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts/2, InVec0, InVec1) && - isHorizontalBinOp(BV, ISD::FSUB, NumElts/2, NumElts, InVec2, InVec3) && - InVec0.getNode() == InVec2.getNode() && - InVec1.getNode() == InVec3.getNode()) + if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); } else if (VT == MVT::v8i32 || VT == MVT::v16i16) { // Try to match an AVX2 horizontal add/sub of signed integers. @@ -6238,15 +6293,19 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, unsigned X86Opcode; bool CanFold = true; - if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts/2, InVec0, InVec1) && - isHorizontalBinOp(BV, ISD::ADD, NumElts/2, NumElts, InVec2, InVec3) && - InVec0.getNode() == InVec2.getNode() && - InVec1.getNode() == InVec3.getNode()) + if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) X86Opcode = X86ISD::HADD; - else if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts/2, InVec0, InVec1) && - isHorizontalBinOp(BV, ISD::SUB, NumElts/2, NumElts, InVec2, InVec3) && - InVec0.getNode() == InVec2.getNode() && - InVec1.getNode() == InVec3.getNode()) + else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) X86Opcode = X86ISD::HSUB; else CanFold = false; @@ -6257,29 +6316,45 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, if (Subtarget->hasAVX2()) return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1); + // Do not try to expand this build_vector into a pair of horizontal + // add/sub if we can emit a pair of scalar add/sub. + if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) + return SDValue(); + // Convert this build_vector into a pair of horizontal binop followed by // a concat vector. - return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false); + bool isUndefLO = NumUndefsLO == Half; + bool isUndefHI = NumUndefsHI == Half; + return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false, + isUndefLO, isUndefHI); } } if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || VT == MVT::v16i16) && Subtarget->hasAVX()) { unsigned X86Opcode; - if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts, InVec0, InVec1)) + if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::HADD; - else if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts, InVec0, InVec1)) + else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::HSUB; - else if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts, InVec0, InVec1)) + else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::FHADD; - else if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts, InVec0, InVec1)) + else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::FHSUB; else return SDValue(); + // Don't try to expand this build_vector into a pair of horizontal add/sub + // if we can simply emit a pair of scalar add/sub. + if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) + return SDValue(); + // Convert this build_vector into two horizontal add/sub followed by // a concat vector. - return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true); + bool isUndefLO = NumUndefsLO == Half; + bool isUndefHI = NumUndefsHI == Half; + return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, + isUndefLO, isUndefHI); } return SDValue(); diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll new file mode 100644 index 000000000000..954a9d994e61 --- /dev/null +++ b/test/CodeGen/X86/haddsub-undef.ll @@ -0,0 +1,325 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE +; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX +; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 + +; Verify that we correctly fold horizontal binop even in the presence of UNDEFs. + +define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %a, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext2 = extractelement <4 x float> %a, i32 2 + %vecext3 = extractelement <4 x float> %a, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 + %vecext10 = extractelement <4 x float> %b, i32 2 + %vecext11 = extractelement <4 x float> %b, i32 3 + %add12 = fadd float %vecext10, %vecext11 + %vecinit13 = insertelement <4 x float> %vecinit5, float %add12, i32 3 + ret <4 x float> %vecinit13 +} +; CHECK-LABEL: test1_undef +; SSE: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NEXT: ret + + +define <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %a, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext6 = extractelement <4 x float> %b, i32 0 + %vecext7 = extractelement <4 x float> %b, i32 1 + %add8 = fadd float %vecext6, %vecext7 + %vecinit9 = insertelement <4 x float> %vecinit, float %add8, i32 2 + %vecext10 = extractelement <4 x float> %b, i32 2 + %vecext11 = extractelement <4 x float> %b, i32 3 + %add12 = fadd float %vecext10, %vecext11 + %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3 + ret <4 x float> %vecinit13 +} +; CHECK-LABEL: test2_undef +; SSE: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NEXT: ret + + +define <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %a, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext2 = extractelement <4 x float> %a, i32 2 + %vecext3 = extractelement <4 x float> %a, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 + %vecext6 = extractelement <4 x float> %b, i32 0 + %vecext7 = extractelement <4 x float> %b, i32 1 + %add8 = fadd float %vecext6, %vecext7 + %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2 + ret <4 x float> %vecinit9 +} +; CHECK-LABEL: test3_undef +; SSE: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NEXT: ret + + +define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %a, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 0 + ret <4 x float> %vecinit +} +; CHECK-LABEL: test4_undef +; CHECK-NOT: haddps +; CHECK: ret + + +define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { + %vecext = extractelement <2 x double> %a, i32 0 + %vecext1 = extractelement <2 x double> %a, i32 1 + %add = fadd double %vecext, %vecext1 + %vecinit = insertelement <2 x double> undef, double %add, i32 0 + ret <2 x double> %vecinit +} +; CHECK-LABEL: test5_undef +; CHECK-NOT: haddpd +; CHECK: ret + + +define <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %a, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext2 = extractelement <4 x float> %a, i32 2 + %vecext3 = extractelement <4 x float> %a, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 + ret <4 x float> %vecinit5 +} +; CHECK-LABEL: test6_undef +; SSE: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NEXT: ret + + +define <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %b, i32 0 + %vecext1 = extractelement <4 x float> %b, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 2 + %vecext2 = extractelement <4 x float> %b, i32 2 + %vecext3 = extractelement <4 x float> %b, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3 + ret <4 x float> %vecinit5 +} +; CHECK-LABEL: test7_undef +; SSE: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NEXT: ret + + +define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %a, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext2 = extractelement <4 x float> %a, i32 2 + %vecext3 = extractelement <4 x float> %a, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 2 + ret <4 x float> %vecinit5 +} +; CHECK-LABEL: test8_undef +; CHECK-NOT: haddps +; CHECK: ret + + +define <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) { + %vecext = extractelement <4 x float> %a, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext2 = extractelement <4 x float> %b, i32 2 + %vecext3 = extractelement <4 x float> %b, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3 + ret <4 x float> %vecinit5 +} +; CHECK-LABEL: test9_undef +; CHECK: haddps +; CHECK-NEXT: ret + +define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) { + %vecext = extractelement <8 x float> %a, i32 0 + %vecext1 = extractelement <8 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <8 x float> undef, float %add, i32 0 + %vecext2 = extractelement <8 x float> %b, i32 2 + %vecext3 = extractelement <8 x float> %b, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 3 + ret <8 x float> %vecinit5 +} +; CHECK-LABEL: test10_undef +; SSE: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NOT: haddps +; CHECK: ret + +define <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) { + %vecext = extractelement <8 x float> %a, i32 0 + %vecext1 = extractelement <8 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <8 x float> undef, float %add, i32 0 + %vecext2 = extractelement <8 x float> %b, i32 4 + %vecext3 = extractelement <8 x float> %b, i32 5 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 6 + ret <8 x float> %vecinit5 +} +; CHECK-LABEL: test11_undef +; SSE-NOT: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK: ret + +define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) { + %vecext = extractelement <8 x float> %a, i32 0 + %vecext1 = extractelement <8 x float> %a, i32 1 + %add = fadd float %vecext, %vecext1 + %vecinit = insertelement <8 x float> undef, float %add, i32 0 + %vecext2 = extractelement <8 x float> %a, i32 2 + %vecext3 = extractelement <8 x float> %a, i32 3 + %add4 = fadd float %vecext2, %vecext3 + %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1 + ret <8 x float> %vecinit5 +} +; CHECK-LABEL: test12_undef +; SSE: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NOT: haddps +; CHECK: ret + +define <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) { + %vecext = extractelement <8 x float> %a, i32 0 + %vecext1 = extractelement <8 x float> %a, i32 1 + %add1 = fadd float %vecext, %vecext1 + %vecinit1 = insertelement <8 x float> undef, float %add1, i32 0 + %vecext2 = extractelement <8 x float> %a, i32 2 + %vecext3 = extractelement <8 x float> %a, i32 3 + %add2 = fadd float %vecext2, %vecext3 + %vecinit2 = insertelement <8 x float> %vecinit1, float %add2, i32 1 + %vecext4 = extractelement <8 x float> %a, i32 4 + %vecext5 = extractelement <8 x float> %a, i32 5 + %add3 = fadd float %vecext4, %vecext5 + %vecinit3 = insertelement <8 x float> %vecinit2, float %add3, i32 2 + %vecext6 = extractelement <8 x float> %a, i32 6 + %vecext7 = extractelement <8 x float> %a, i32 7 + %add4 = fadd float %vecext6, %vecext7 + %vecinit4 = insertelement <8 x float> %vecinit3, float %add4, i32 3 + ret <8 x float> %vecinit4 +} +; CHECK-LABEL: test13_undef +; SSE: haddps +; SSE-NOT: haddps +; AVX: vhaddps +; AVX2: vhaddps +; CHECK-NOT: haddps +; CHECK: ret + +define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) { + %vecext = extractelement <8 x i32> %a, i32 0 + %vecext1 = extractelement <8 x i32> %a, i32 1 + %add = add i32 %vecext, %vecext1 + %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 + %vecext2 = extractelement <8 x i32> %b, i32 2 + %vecext3 = extractelement <8 x i32> %b, i32 3 + %add4 = add i32 %vecext2, %vecext3 + %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 3 + ret <8 x i32> %vecinit5 +} +; CHECK-LABEL: test14_undef +; SSE: phaddd +; AVX: vphaddd +; AVX2: vphaddd +; CHECK-NOT: phaddd +; CHECK: ret + +; On AVX2, the following sequence can be folded into a single horizontal add. +; If the Subtarget doesn't support AVX2, then we avoid emitting two packed +; integer horizontal adds instead of two scalar adds followed by vector inserts. +define <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) { + %vecext = extractelement <8 x i32> %a, i32 0 + %vecext1 = extractelement <8 x i32> %a, i32 1 + %add = add i32 %vecext, %vecext1 + %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 + %vecext2 = extractelement <8 x i32> %b, i32 4 + %vecext3 = extractelement <8 x i32> %b, i32 5 + %add4 = add i32 %vecext2, %vecext3 + %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 6 + ret <8 x i32> %vecinit5 +} +; CHECK-LABEL: test15_undef +; SSE-NOT: phaddd +; AVX-NOT: vphaddd +; AVX2: vphaddd +; CHECK: ret + +define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) { + %vecext = extractelement <8 x i32> %a, i32 0 + %vecext1 = extractelement <8 x i32> %a, i32 1 + %add = add i32 %vecext, %vecext1 + %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 + %vecext2 = extractelement <8 x i32> %a, i32 2 + %vecext3 = extractelement <8 x i32> %a, i32 3 + %add4 = add i32 %vecext2, %vecext3 + %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1 + ret <8 x i32> %vecinit5 +} +; CHECK-LABEL: test16_undef +; SSE: phaddd +; AVX: vphaddd +; AVX2: vphaddd +; CHECK-NOT: haddps +; CHECK: ret + +define <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) { + %vecext = extractelement <8 x i32> %a, i32 0 + %vecext1 = extractelement <8 x i32> %a, i32 1 + %add1 = add i32 %vecext, %vecext1 + %vecinit1 = insertelement <8 x i32> undef, i32 %add1, i32 0 + %vecext2 = extractelement <8 x i32> %a, i32 2 + %vecext3 = extractelement <8 x i32> %a, i32 3 + %add2 = add i32 %vecext2, %vecext3 + %vecinit2 = insertelement <8 x i32> %vecinit1, i32 %add2, i32 1 + %vecext4 = extractelement <8 x i32> %a, i32 4 + %vecext5 = extractelement <8 x i32> %a, i32 5 + %add3 = add i32 %vecext4, %vecext5 + %vecinit3 = insertelement <8 x i32> %vecinit2, i32 %add3, i32 2 + %vecext6 = extractelement <8 x i32> %a, i32 6 + %vecext7 = extractelement <8 x i32> %a, i32 7 + %add4 = add i32 %vecext6, %vecext7 + %vecinit4 = insertelement <8 x i32> %vecinit3, i32 %add4, i32 3 + ret <8 x i32> %vecinit4 +} +; CHECK-LABEL: test17_undef +; SSE: phaddd +; AVX: vphaddd +; AVX2: vphaddd +; CHECK-NOT: haddps +; CHECK: ret + From 85386a3de9ebcbf688c0cb864fc26c62fa5220f3 Mon Sep 17 00:00:00 2001 From: Dinesh Dwivedi Date: Thu, 19 Jun 2014 10:36:52 +0000 Subject: [PATCH 0029/1155] Added instruction combine to transform few more negative values addition to subtraction (Part 1) This patch enables transforms for following patterns. (x + (~(y & c) + 1) --> x - (y & c) (x + (~((y >> z) & c) + 1) --> x - ((y>>z) & c) Differential Revision: http://reviews.llvm.org/D3733 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211266 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAddSub.cpp | 45 +++++++++++++ test/Transforms/InstCombine/add2.ll | 63 +++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 77b2404b49d4..5b28d8ccb5b2 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -954,6 +954,48 @@ bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) { return true; return false; + } + +// Checks if any operand is negative and we can convert add to sub. +// This function checks for following negative patterns +// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C)) +// TODO: ADD(XOR(AND(Z, ~C), ~C), 1) == NEG(OR(Z, C)) if C is even +// TODO: XOR(AND(Z, ~C), (~C + 1)) == NEG(OR(Z, C)) if C is odd +Value *checkForNegativeOperand(BinaryOperator &I, + InstCombiner::BuilderTy *Builder) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + // This function creates 2 instructions to replace ADD, we need at least one + // of LHS or RHS to have one use to ensure benefit in transform. + if (!LHS->hasOneUse() && !RHS->hasOneUse()) + return nullptr; + + bool IHasNSW = I.hasNoSignedWrap(); + bool IHasNUW = I.hasNoUnsignedWrap(); + + Value *X = nullptr, *Y = nullptr, *Z = nullptr; + const APInt *C1 = nullptr, *C2 = nullptr; + + // if ONE is on other side, swap + if (match(RHS, m_Add(m_Value(X), m_One()))) + std::swap(LHS, RHS); + + if (match(LHS, m_Add(m_Value(X), m_One()))) { + // if XOR on other side, swap + if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) + std::swap(X, RHS); + + // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) + // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) + if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) { + if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) { + Value *NewAnd = Builder->CreateAnd(Z, *C1); + return Builder->CreateSub(RHS, NewAnd, "", IHasNUW, IHasNSW); + } + } + } + + return nullptr; } Instruction *InstCombiner::visitAdd(BinaryOperator &I) { @@ -1065,6 +1107,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = dyn_castNegVal(RHS)) return BinaryOperator::CreateSub(LHS, V); + if (Value *V = checkForNegativeOperand(I, Builder)) + return ReplaceInstUsesWith(I, V); + // A+B --> A|B iff A and B have no bits set in common. if (IntegerType *IT = dyn_cast(I.getType())) { APInt LHSKnownOne(IT->getBitWidth(), 0); diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll index c728b4437af4..0328c4ff6e3f 100644 --- a/test/Transforms/InstCombine/add2.ll +++ b/test/Transforms/InstCombine/add2.ll @@ -87,6 +87,69 @@ define i16 @test9(i16 %a) { ; CHECK-NEXT: ret i16 %d } +define i32 @test10(i32 %x) { + %x.not = or i32 %x, -1431655766 + %neg = xor i32 %x.not, 1431655765 + %add = add i32 %x, 1 + %add1 = add i32 %add, %neg + ret i32 %add1 +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -1431655766 +; CHECK-NEXT: ret i32 [[AND]] +} + +define i32 @test11(i32 %x, i32 %y) { + %x.not = or i32 %x, -1431655766 + %neg = xor i32 %x.not, 1431655765 + %add = add i32 %y, 1 + %add1 = add i32 %add, %neg + ret i32 %add1 +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655765 +; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]] +; CHECK-NEXT: ret i32 [[SUB]] +} + +define i32 @test12(i32 %x, i32 %y) { + %shr = ashr i32 %x, 3 + %shr.not = or i32 %shr, -1431655766 + %neg = xor i32 %shr.not, 1431655765 + %add = add i32 %y, 1 + %add1 = add i32 %add, %neg + ret i32 %add1 +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3 +; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655765 +; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]] +; CHECK-NEXT: ret i32 [[SUB]] +} + +define i32 @test13(i32 %x, i32 %y) { + %x.not = or i32 %x, -1431655767 + %neg = xor i32 %x.not, 1431655766 + %add = add i32 %y, 1 + %add1 = add i32 %add, %neg + ret i32 %add1 +; CHECK-LABEL: @test13( +; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655766 +; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]] +; CHECK-NEXT: ret i32 [[SUB]] +} + +define i32 @test14(i32 %x, i32 %y) { + %shr = ashr i32 %x, 3 + %shr.not = or i32 %shr, -1431655767 + %neg = xor i32 %shr.not, 1431655766 + %add = add i32 %y, 1 + %add1 = add i32 %add, %neg + ret i32 %add1 +; CHECK-LABEL: @test14( +; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3 +; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655766 +; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]] +; CHECK-NEXT: ret i32 [[SUB]] +} + define i16 @add_nsw_mul_nsw(i16 %x) { %add1 = add nsw i16 %x, %x %add2 = add nsw i16 %add1, %x From dc9bdcc2cbd027e38d0abfdf36ff3fa59453d8be Mon Sep 17 00:00:00 2001 From: Dinesh Dwivedi Date: Thu, 19 Jun 2014 14:11:53 +0000 Subject: [PATCH 0030/1155] Updated comments as suggested by Rafael. Thanks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211268 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/JumpThreading.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index e501ff29d038..6e50d3331df6 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -158,7 +158,13 @@ bool JumpThreading::runOnFunction(Function &F) { TLI = &getAnalysis(); LVI = &getAnalysis(); - // Remove unreachable blocks from function as they may result in infinite loop. + // Remove unreachable blocks from function as they may result in infinite + // loop. We do threading if we found something profitable. Jump threading a + // branch can create other opportunities. If these opportunities form a cycle + // i.e. if any jump treading is undoing previous threading in the path, then + // we will loop forever. We take care of this issue by not jump threading for + // back edges. This works for normal cases but not for unreachable blocks as + // they may have cycle with no back edge. removeUnreachableBlocks(F); FindLoopHeaders(F); From 27107726551771a102be3043fa9e7aeffa2f8023 Mon Sep 17 00:00:00 2001 From: Matheus Almeida Date: Thu, 19 Jun 2014 14:39:14 +0000 Subject: [PATCH 0031/1155] [mips] Small update to the logic behind the expansion of assembly pseudo instructions. Summary: The functions that do the expansion now return false on success and true otherwise. This is so we can catch some errors during the expansion (e.g.: immediate too large). The next patch adds some test cases. Reviewers: vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D4214 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211269 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 29 ++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index dd2b857789ca..8f46820ff7e9 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -116,14 +116,20 @@ class MipsAsmParser : public MCTargetAsmParser { bool needsExpansion(MCInst &Inst); - void expandInstruction(MCInst &Inst, SMLoc IDLoc, + // Expands assembly pseudo instructions. + // Returns false on success, true otherwise. + bool expandInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); - void expandLoadImm(MCInst &Inst, SMLoc IDLoc, + + bool expandLoadImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); - void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, + + bool expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); - void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, + + bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); + void expandMemInst(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions, bool isLoad, bool isImmOpnd); @@ -965,7 +971,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, } // if load/store if (needsExpansion(Inst)) - expandInstruction(Inst, IDLoc, Instructions); + return expandInstruction(Inst, IDLoc, Instructions); else Instructions.push_back(Inst); @@ -984,9 +990,11 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) { } } -void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, +bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { switch (Inst.getOpcode()) { + default: assert(0 && "unimplemented expansion"); + return true; case Mips::LoadImm32Reg: return expandLoadImm(Inst, IDLoc, Instructions); case Mips::LoadAddr32Imm: @@ -996,7 +1004,7 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, } } -void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, +bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { MCInst tmpInst; const MCOperand &ImmOp = Inst.getOperand(1); @@ -1038,9 +1046,10 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, tmpInst.setLoc(IDLoc); Instructions.push_back(tmpInst); } + return false; } -void +bool MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { MCInst tmpInst; @@ -1081,9 +1090,10 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); Instructions.push_back(tmpInst); } + return false; } -void +bool MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { MCInst tmpInst; @@ -1115,6 +1125,7 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); Instructions.push_back(tmpInst); } + return false; } void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, From 7e4098332804df5126060a2c8f846445a82c5a62 Mon Sep 17 00:00:00 2001 From: Matheus Almeida Date: Thu, 19 Jun 2014 15:08:04 +0000 Subject: [PATCH 0032/1155] [mips] Implementation of dli. Patch by David Chisnall His work was sponsored by: DARPA, AFRL Some small modifications to the original patch: we now error if it's not possible to expand an instruction (mips-expansions-bad.s has some examples). Added some comments to the expansions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211271 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 97 ++++++++- lib/Target/Mips/Mips64InstrInfo.td | 7 + test/MC/Mips/mips-expansions-bad.s | 6 + test/MC/Mips/mips64-expansions.s | 209 ++++++++++++++++++++ 4 files changed, 312 insertions(+), 7 deletions(-) create mode 100644 test/MC/Mips/mips-expansions-bad.s create mode 100644 test/MC/Mips/mips64-expansions.s diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 8f46820ff7e9..dc9ed2feeb9c 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -984,6 +984,7 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) { case Mips::LoadImm32Reg: case Mips::LoadAddr32Imm: case Mips::LoadAddr32Reg: + case Mips::LoadImm64Reg: return true; default: return false; @@ -997,6 +998,12 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, return true; case Mips::LoadImm32Reg: return expandLoadImm(Inst, IDLoc, Instructions); + case Mips::LoadImm64Reg: + if (!isGP64()) { + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + return expandLoadImm(Inst, IDLoc, Instructions); case Mips::LoadAddr32Imm: return expandLoadAddressImm(Inst, IDLoc, Instructions); case Mips::LoadAddr32Reg: @@ -1004,6 +1011,30 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, } } +namespace { +template +void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + MCInst tmpInst; + if (PerformShift) { + tmpInst.setOpcode(Mips::DSLL); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand(MCOperand::CreateImm(16)); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); + tmpInst.clear(); + } + tmpInst.setOpcode(Mips::ORi); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand( + MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift))); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} +} + bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { MCInst tmpInst; @@ -1012,8 +1043,10 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, const MCOperand &RegOp = Inst.getOperand(0); assert(RegOp.isReg() && "expected register operand kind"); - int ImmValue = ImmOp.getImm(); + int64_t ImmValue = ImmOp.getImm(); tmpInst.setLoc(IDLoc); + // FIXME: gas has a special case for values that are 000...1111, which + // becomes a li -1 and then a dsrl if (0 <= ImmValue && ImmValue <= 65535) { // For 0 <= j <= 65535. // li d,j => ori d,$zero,j @@ -1030,21 +1063,71 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); - } else { - // For any other value of j that is representable as a 32-bit integer. + } else if ((ImmValue & 0xffffffff) == ImmValue) { + // For any value of j that is representable as a 32-bit integer, create + // a sequence of: // li d,j => lui d,hi16(j) // ori d,d,lo16(j) tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); - tmpInst.clear(); - tmpInst.setOpcode(Mips::ORi); + createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + } else if ((ImmValue & (0xffffLL << 48)) == 0) { + if (!isGP64()) { + Error (IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + + // <------- lo32 ------> + // <------- hi32 ------> + // <- hi16 -> <- lo16 -> + // _________________________________ + // | | | | + // | 16-bytes | 16-bytes | 16-bytes | + // |__________|__________|__________| + // + // For any value of j that is representable as a 48-bit integer, create + // a sequence of: + // li d,j => lui d,hi16(j) + // ori d,d,hi16(lo32(j)) + // dsll d,d,16 + // ori d,d,lo16(lo32(j)) + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand( + MCOperand::CreateImm((ImmValue & (0xffffLL << 32)) >> 32)); + Instructions.push_back(tmpInst); + createShiftOr<16, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + } else { + if (!isGP64()) { + Error (IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + + // <------- hi32 ------> <------- lo32 ------> + // <- hi16 -> <- lo16 -> + // ___________________________________________ + // | | | | | + // | 16-bytes | 16-bytes | 16-bytes | 16-bytes | + // |__________|__________|__________|__________| + // + // For any value of j that isn't representable as a 48-bit integer. + // li d,j => lui d,hi16(j) + // ori d,d,lo16(hi32(j)) + // dsll d,d,16 + // ori d,d,hi16(lo32(j)) + // dsll d,d,16 + // ori d,d,lo16(lo32(j)) + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); - tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); - tmpInst.setLoc(IDLoc); + tmpInst.addOperand( + MCOperand::CreateImm((ImmValue & (0xffffLL << 48)) >> 48)); Instructions.push_back(tmpInst); + createShiftOr<32, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + createShiftOr<16, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions); } return false; } diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 88422ce19dea..2b9cda7ac83c 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -23,6 +23,8 @@ def uimm16_64 : Operand { // Signed Operand def simm10_64 : Operand; +def imm64: Operand; + // Transformation Function - get Imm - 32. def Subtract32 : SDNodeXFormgetZExtValue() - 32); @@ -486,6 +488,11 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs", (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; +class LoadImm64< string instr_asm, Operand Od, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64), + !strconcat(instr_asm, "\t$rt, $imm64")> ; +def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>; + /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64", Predicates = [HasMips64] in { def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>; diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s new file mode 100644 index 000000000000..a137deb8d172 --- /dev/null +++ b/test/MC/Mips/mips-expansions-bad.s @@ -0,0 +1,6 @@ +# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1 +# RUN: FileCheck %s < %t1 + + .text + li $5, 0x100000000 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled + dli $5, 1 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled diff --git a/test/MC/Mips/mips64-expansions.s b/test/MC/Mips/mips64-expansions.s new file mode 100644 index 000000000000..0efdd2fa5c81 --- /dev/null +++ b/test/MC/Mips/mips64-expansions.s @@ -0,0 +1,209 @@ +# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s +# +# The GNU assembler implements 'dli' and 'dla' variants on 'li' and 'la' +# supporting double-word lengths. Test that not only are they present, bu +# that they also seem to handle 64-bit values. +# +# XXXRW: Does using powers of ten make me a bad person? +# +# CHECK: ori $12, $zero, 1 # encoding: [0x01,0x00,0x0c,0x34] +# CHECK: ori $12, $zero, 10 # encoding: [0x0a,0x00,0x0c,0x34] +# CHECK: ori $12, $zero, 100 # encoding: [0x64,0x00,0x0c,0x34] +# CHECK: ori $12, $zero, 1000 # encoding: [0xe8,0x03,0x0c,0x34] +# CHECK: ori $12, $zero, 10000 # encoding: [0x10,0x27,0x0c,0x34] +# CHECK: lui $12, 1 # encoding: [0x01,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 34464 # encoding: [0xa0,0x86,0x8c,0x35] +# CHECK: lui $12, 15 # encoding: [0x0f,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 16960 # encoding: [0x40,0x42,0x8c,0x35] +# CHECK: lui $12, 152 # encoding: [0x98,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 38528 # encoding: [0x80,0x96,0x8c,0x35] +# CHECK: lui $12, 1525 # encoding: [0xf5,0x05,0x0c,0x3c] +# CHECK: ori $12, $12, 57600 # encoding: [0x00,0xe1,0x8c,0x35] +# CHECK: lui $12, 15258 # encoding: [0x9a,0x3b,0x0c,0x3c] +# CHECK: ori $12, $12, 51712 # encoding: [0x00,0xca,0x8c,0x35] +# CHECK: lui $12, 2 # encoding: [0x02,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 21515 # encoding: [0x0b,0x54,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 58368 # encoding: [0x00,0xe4,0x8c,0x35] +# CHECK: lui $12, 23 # encoding: [0x17,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 18550 # encoding: [0x76,0x48,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 59392 # encoding: [0x00,0xe8,0x8c,0x35] +# CHECK: lui $12, 232 # encoding: [0xe8,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 54437 # encoding: [0xa5,0xd4,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 4096 # encoding: [0x00,0x10,0x8c,0x35] +# CHECK: lui $12, 2328 # encoding: [0x18,0x09,0x0c,0x3c] +# CHECK: ori $12, $12, 20082 # encoding: [0x72,0x4e,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 40960 # encoding: [0x00,0xa0,0x8c,0x35] +# CHECK: lui $12, 23283 # encoding: [0xf3,0x5a,0x0c,0x3c] +# CHECK: ori $12, $12, 4218 # encoding: [0x7a,0x10,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 16384 # encoding: [0x00,0x40,0x8c,0x35] +# CHECK: lui $12, 3 # encoding: [0x03,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 36222 # encoding: [0x7e,0x8d,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 42182 # encoding: [0xc6,0xa4,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 32768 # encoding: [0x00,0x80,0x8c,0x35] +# CHECK: lui $12, 35 # encoding: [0x23,0x00,0x0c,0x3c] +# CHECK: ori $12, $12, 34546 # encoding: [0xf2,0x86,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 28609 # encoding: [0xc1,0x6f,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] +# CHECK: lui $12, 355 # encoding: [0x63,0x01,0x0c,0x3c] +# CHECK: ori $12, $12, 17784 # encoding: [0x78,0x45,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 23946 # encoding: [0x8a,0x5d,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] +# CHECK: lui $12, 3552 # encoding: [0xe0,0x0d,0x0c,0x3c] +# CHECK: ori $12, $12, 46771 # encoding: [0xb3,0xb6,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 42852 # encoding: [0x64,0xa7,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] +# CHECK: lui $12, 35527 # encoding: [0xc7,0x8a,0x0c,0x3c] +# CHECK: ori $12, $12, 8964 # encoding: [0x04,0x23,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 35304 # encoding: [0xe8,0x89,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] +# CHECK: addiu $12, $zero, -1 # encoding: [0xff,0xff,0x0c,0x24] +# CHECK: addiu $12, $zero, -10 # encoding: [0xf6,0xff,0x0c,0x24] +# CHECK: addiu $12, $zero, -100 # encoding: [0x9c,0xff,0x0c,0x24] +# CHECK: addiu $12, $zero, -1000 # encoding: [0x18,0xfc,0x0c,0x24] +# CHECK: addiu $12, $zero, -10000 # encoding: [0xf0,0xd8,0x0c,0x24] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65535 # encoding: [0xff,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 65534 # encoding: [0xfe,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 31072 # encoding: [0x60,0x79,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65535 # encoding: [0xff,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 65520 # encoding: [0xf0,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 48576 # encoding: [0xc0,0xbd,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65535 # encoding: [0xff,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 65383 # encoding: [0x67,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 27008 # encoding: [0x80,0x69,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65535 # encoding: [0xff,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 64010 # encoding: [0x0a,0xfa,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 7936 # encoding: [0x00,0x1f,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65535 # encoding: [0xff,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 50277 # encoding: [0x65,0xc4,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 13824 # encoding: [0x00,0x36,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65533 # encoding: [0xfd,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 44020 # encoding: [0xf4,0xab,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 7168 # encoding: [0x00,0x1c,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65512 # encoding: [0xe8,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 46985 # encoding: [0x89,0xb7,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 6144 # encoding: [0x00,0x18,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 65303 # encoding: [0x17,0xff,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 11098 # encoding: [0x5a,0x2b,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 61440 # encoding: [0x00,0xf0,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 63207 # encoding: [0xe7,0xf6,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 45453 # encoding: [0x8d,0xb1,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 24576 # encoding: [0x00,0x60,0x8c,0x35] +# CHECK: lui $12, 65535 # encoding: [0xff,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 42252 # encoding: [0x0c,0xa5,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 61317 # encoding: [0x85,0xef,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 49152 # encoding: [0x00,0xc0,0x8c,0x35] +# CHECK: lui $12, 65532 # encoding: [0xfc,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 29313 # encoding: [0x81,0x72,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 23353 # encoding: [0x39,0x5b,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 32768 # encoding: [0x00,0x80,0x8c,0x35] +# CHECK: lui $12, 65500 # encoding: [0xdc,0xff,0x0c,0x3c] +# CHECK: ori $12, $12, 30989 # encoding: [0x0d,0x79,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 36927 # encoding: [0x3f,0x90,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] +# CHECK: lui $12, 65180 # encoding: [0x9c,0xfe,0x0c,0x3c] +# CHECK: ori $12, $12, 47751 # encoding: [0x87,0xba,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 41590 # encoding: [0x76,0xa2,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] +# CHECK: lui $12, 61983 # encoding: [0x1f,0xf2,0x0c,0x3c] +# CHECK: ori $12, $12, 18764 # encoding: [0x4c,0x49,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 22684 # encoding: [0x9c,0x58,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] +# CHECK: lui $12, 30008 # encoding: [0x38,0x75,0x0c,0x3c] +# CHECK: ori $12, $12, 56571 # encoding: [0xfb,0xdc,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 30232 # encoding: [0x18,0x76,0x8c,0x35] +# CHECK: dsll $12, $12, 16 # encoding: [0x38,0x64,0x0c,0x00] +# CHECK: ori $12, $12, 0 # encoding: [0x00,0x00,0x8c,0x35] + + dli $t0, 1 + dli $t0, 10 + dli $t0, 100 + dli $t0, 1000 + dli $t0, 10000 + dli $t0, 100000 + dli $t0, 1000000 + dli $t0, 10000000 + dli $t0, 100000000 + dli $t0, 1000000000 + dli $t0, 10000000000 + dli $t0, 100000000000 + dli $t0, 1000000000000 + dli $t0, 10000000000000 + dli $t0, 100000000000000 + dli $t0, 1000000000000000 + dli $t0, 10000000000000000 + dli $t0, 100000000000000000 + dli $t0, 1000000000000000000 + dli $t0, 10000000000000000000 + dli $t0, -1 + dli $t0, -10 + dli $t0, -100 + dli $t0, -1000 + dli $t0, -10000 + dli $t0, -100000 + dli $t0, -1000000 + dli $t0, -10000000 + dli $t0, -100000000 + dli $t0, -1000000000 + dli $t0, -10000000000 + dli $t0, -100000000000 + dli $t0, -1000000000000 + dli $t0, -10000000000000 + dli $t0, -100000000000000 + dli $t0, -1000000000000000 + dli $t0, -10000000000000000 + dli $t0, -100000000000000000 + dli $t0, -1000000000000000000 + dli $t0, -10000000000000000000 From bb804ee909a0e81d39920bdc8272e624fb8da443 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 19 Jun 2014 15:39:33 +0000 Subject: [PATCH 0033/1155] Emit DWARF3 call frame information when DWARF3+ debug info is requested Currently, llvm always emits a DWARF CIE with a version of 1, even when emitting DWARF 3 or 4, which both support CIE version 3. This patch makes it emit the newer CIE version when we are emitting DWARF 3 or 4. This will not reduce compatibility, as we already emit other DWARF3/4 features, and is worth doing as the DWARF3 spec removed some ambiguities in the interpretation of call frame information. It also fixes a minor bug where the "return address" field of the CIE was encoded as a ULEB128, which is only valid when the CIE version is 3. There are no test changes for this, because (as far as I can tell) none of the platforms that we test have a return address register with a DWARF register number >127. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211272 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Dwarf.h | 1 - lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 + lib/MC/MCDwarf.cpp | 14 +++- test/DebugInfo/AArch64/eh_frame.s | 4 +- .../DebugInfo/AArch64/eh_frame_personality.ll | 4 +- test/DebugInfo/SystemZ/eh_frame.s | 4 +- test/DebugInfo/SystemZ/eh_frame_personality.s | 4 +- test/MC/ELF/cfi-adjust-cfa-offset.s | 2 +- test/MC/ELF/cfi-advance-loc2.s | 2 +- test/MC/ELF/cfi-def-cfa-offset.s | 2 +- test/MC/ELF/cfi-def-cfa-register.s | 2 +- test/MC/ELF/cfi-def-cfa.s | 2 +- test/MC/ELF/cfi-escape.s | 2 +- test/MC/ELF/cfi-offset.s | 2 +- test/MC/ELF/cfi-register.s | 2 +- test/MC/ELF/cfi-rel-offset.s | 2 +- test/MC/ELF/cfi-rel-offset2.s | 2 +- test/MC/ELF/cfi-remember.s | 2 +- test/MC/ELF/cfi-restore.s | 2 +- test/MC/ELF/cfi-same-value.s | 2 +- test/MC/ELF/cfi-sections.s | 4 +- test/MC/ELF/cfi-signal-frame.s | 4 +- test/MC/ELF/cfi-undefined.s | 2 +- test/MC/ELF/cfi-version.ll | 47 +++++++++++++ test/MC/ELF/cfi-window-save.s | 2 +- test/MC/ELF/cfi-zero-addr-delta.s | 2 +- test/MC/ELF/cfi.s | 70 +++++++++---------- test/MC/Mips/eh-frame.s | 8 +-- test/MC/PowerPC/ppc64-initial-cfa.s | 8 +-- 29 files changed, 132 insertions(+), 74 deletions(-) create mode 100644 test/MC/ELF/cfi-version.ll diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h index ca316441ea7e..cd9f75600cbc 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/Support/Dwarf.h @@ -57,7 +57,6 @@ enum LLVMConstants : uint32_t { DW_TAG_user_base = 0x1000, // Recommended base for user tags. DWARF_VERSION = 4, // Default dwarf version we output. - DW_CIE_VERSION = 1, // Common frame information version. DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes. DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames. DW_ARANGES_VERSION = 2 // Section version number for .debug_aranges. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7aaf731b6f8b..3847eb124f3d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -209,6 +209,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); + Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion); + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index be6731abedd9..1016466839f6 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1270,7 +1270,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, // Version if (verboseAsm) streamer.AddComment("DW_CIE_VERSION"); - streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1); + // For DWARF2, we use CIE version 1 + // For DWARF3+, we use CIE version 3 + uint8_t CIEVersion = context.getDwarfVersion() <= 2 ? 1 : 3; + streamer.EmitIntValue(CIEVersion, 1); // Augmentation String SmallString<8> Augmentation; @@ -1298,7 +1301,14 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, // Return Address Register if (verboseAsm) streamer.AddComment("CIE Return Address Column"); - streamer.EmitULEB128IntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true)); + if (CIEVersion == 1) { + assert(MRI->getRARegister() <= 255 && + "DWARF 2 encodes return_address_register in one byte"); + streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true), 1); + } else { + streamer.EmitULEB128IntValue( + MRI->getDwarfRegNum(MRI->getRARegister(), true)); + } // Augmentation Data Length (optional) diff --git a/test/DebugInfo/AArch64/eh_frame.s b/test/DebugInfo/AArch64/eh_frame.s index d8d6b6d9325f..12a58961d717 100644 --- a/test/DebugInfo/AArch64/eh_frame.s +++ b/test/DebugInfo/AArch64/eh_frame.s @@ -17,7 +17,7 @@ foo: // Output is: // CHECK: Contents of section .eh_frame: -// CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01 .........zR..|.. +// CHECK-NEXT: 0000 10000000 00000000 037a5200 017c1e01 .........zR..|.. // CHECK-NEXT: 0010 1b0c1f00 10000000 18000000 00000000 ................ @@ -30,7 +30,7 @@ foo: // ------------------- // 10000000: length of first CIE = 0x10 // 00000000: This is a CIE -// 01: version = 0x1 +// 03: version = 0x3 // 7a 52 00: augmentation string "zR" -- pointer format is specified // 01: code alignment factor 1 // 7c: data alignment factor -4 diff --git a/test/DebugInfo/AArch64/eh_frame_personality.ll b/test/DebugInfo/AArch64/eh_frame_personality.ll index d35f2a2fcafb..51d6bf80b950 100644 --- a/test/DebugInfo/AArch64/eh_frame_personality.ll +++ b/test/DebugInfo/AArch64/eh_frame_personality.ll @@ -16,7 +16,7 @@ clean: } ; CHECK: Contents of section .eh_frame: -; CHECK: 0000 1c000000 00000000 017a504c 5200017c .........zPLR..| +; CHECK: 0000 1c000000 00000000 037a504c 5200017c .........zPLR..| ; CHECK: 0010 1e0b0000 00000000 00000000 1b0c1f00 ................ ; Don't really care about the rest: @@ -33,7 +33,7 @@ clean: ; ---------- ; 1c000000: Length = 0x1c ; 00000000: This is a CIE -; 01: Version 1 +; 03: Version 3 ; 7a 50 4c 52 00: Augmentation string "zPLR" (personality routine, language-specific data, pointer format) ; 01: Code alignment factor 1 ; 78: Data alignment factor: -8 diff --git a/test/DebugInfo/SystemZ/eh_frame.s b/test/DebugInfo/SystemZ/eh_frame.s index 4e7afd56e94b..d55b6cdea8c3 100644 --- a/test/DebugInfo/SystemZ/eh_frame.s +++ b/test/DebugInfo/SystemZ/eh_frame.s @@ -23,7 +23,7 @@ check_largest_class: # Contents of the .eh_frame section: # # 00000000 0000001c 00000000 CIE -# Version: 1 +# Version: 3 # Augmentation: "zR" # Code alignment factor: 1 # Data alignment factor: -8 @@ -48,7 +48,7 @@ check_largest_class: # DW_CFA_nop # # CHECK: Contents of section .eh_frame: -# CHECK-NEXT: 0000 00000014 00000000 017a5200 01780e01 .........zR..x.. +# CHECK-NEXT: 0000 00000014 00000000 037a5200 01780e01 .........zR..x.. # CHECK-NEXT: 0010 1b0c0fa0 01000000 0000001c 0000001c ................ # CHECK-NEXT: 0020 00000000 00000012 00468d07 8e068f05 .........F...... # CHECK-NEXT: 0030 440ec002 00000000 D....... diff --git a/test/DebugInfo/SystemZ/eh_frame_personality.s b/test/DebugInfo/SystemZ/eh_frame_personality.s index 46b46db1d806..456e0a6e6bdd 100644 --- a/test/DebugInfo/SystemZ/eh_frame_personality.s +++ b/test/DebugInfo/SystemZ/eh_frame_personality.s @@ -37,7 +37,7 @@ DW.ref.__gxx_personality_v0: # Contents of the .eh_frame section: # # 00000000 0000001c 00000000 CIE -# Version: 1 +# Version: 3 # Augmentation: "zPLR" # Code alignment factor: 1 # Data alignment factor: -8 @@ -61,7 +61,7 @@ DW.ref.__gxx_personality_v0: # DW_CFA_nop # # CHECK: Contents of section .eh_frame: -# CHECK-NEXT: 0000 0000001c 00000000 017a504c 52000178 .........zPLR..x +# CHECK-NEXT: 0000 0000001c 00000000 037a504c 52000178 .........zPLR..x # CHECK-NEXT: 0010 0e079b00 0000001b 1b0c0fa0 01000000 ................ # CHECK-NEXT: 0020 0000001c 00000024 00000000 00000012 .......$........ # CHECK-NEXT: 0030 04000000 00468e06 8f05440e c0020000 .....F....D..... diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s index b3768cb9834c..9d639f70d8dd 100644 --- a/test/MC/ELF/cfi-adjust-cfa-offset.s +++ b/test/MC/ELF/cfi-adjust-cfa-offset.s @@ -28,7 +28,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 1C000000 1C000000 // CHECK-NEXT: 0020: 00000000 0A000000 00440E10 410E1444 // CHECK-NEXT: 0030: 0E080000 00000000 diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s index d7a53c462b70..98caa0185f59 100644 --- a/test/MC/ELF/cfi-advance-loc2.s +++ b/test/MC/ELF/cfi-advance-loc2.s @@ -26,7 +26,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 01010000 00030001 0E080000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s index eac2c731fa93..59f740055d47 100644 --- a/test/MC/ELF/cfi-def-cfa-offset.s +++ b/test/MC/ELF/cfi-def-cfa-offset.s @@ -27,7 +27,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 0A000000 00440E10 450E0800 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s index 00d8b99af9d6..178ba32882dc 100644 --- a/test/MC/ELF/cfi-def-cfa-register.s +++ b/test/MC/ELF/cfi-def-cfa-register.s @@ -23,7 +23,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00410D06 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s index 36e147f5a4da..dfb0d4b59396 100644 --- a/test/MC/ELF/cfi-def-cfa.s +++ b/test/MC/ELF/cfi-def-cfa.s @@ -23,7 +23,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00410C07 08000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s index 839d6717debc..5394ee414aa7 100644 --- a/test/MC/ELF/cfi-escape.s +++ b/test/MC/ELF/cfi-escape.s @@ -24,7 +24,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00411507 7F000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s index 951a6001e519..a65b4fc783c7 100644 --- a/test/MC/ELF/cfi-offset.s +++ b/test/MC/ELF/cfi-offset.s @@ -23,7 +23,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00418602 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s index 4abbb53b8fc9..94417702c13c 100644 --- a/test/MC/ELF/cfi-register.s +++ b/test/MC/ELF/cfi-register.s @@ -24,7 +24,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00410906 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s index 34254c862a46..0dc69c89cf4c 100644 --- a/test/MC/ELF/cfi-rel-offset.s +++ b/test/MC/ELF/cfi-rel-offset.s @@ -31,7 +31,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 24000000 1C000000 // CHECK-NEXT: 0020: 00000000 05000000 00410E08 410D0641 // CHECK-NEXT: 0030: 11067F41 0E104186 02000000 00000000 diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s index 3de769f39fa0..360e7b0ea0f5 100644 --- a/test/MC/ELF/cfi-rel-offset2.s +++ b/test/MC/ELF/cfi-rel-offset2.s @@ -23,7 +23,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 01000000 00411106 7F000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s index 98c759d4fffc..3a38948b6a3e 100644 --- a/test/MC/ELF/cfi-remember.s +++ b/test/MC/ELF/cfi-remember.s @@ -26,7 +26,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 03000000 00410A41 0B000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s index d25b5ff2e93f..e225797f54d6 100644 --- a/test/MC/ELF/cfi-restore.s +++ b/test/MC/ELF/cfi-restore.s @@ -24,7 +24,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 0041C600 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s index 9f5ae4be9ed4..2d37f4d0b43e 100644 --- a/test/MC/ELF/cfi-same-value.s +++ b/test/MC/ELF/cfi-same-value.s @@ -24,7 +24,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00410806 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-sections.s b/test/MC/ELF/cfi-sections.s index 15a79e5c055e..b0ba543e5bdb 100644 --- a/test/MC/ELF/cfi-sections.s +++ b/test/MC/ELF/cfi-sections.s @@ -26,7 +26,7 @@ f2: // ELF_64-NEXT: AddressAlignment: 8 // ELF_64-NEXT: EntrySize: 0 // ELF_64-NEXT: SectionData ( -// ELF_64-NEXT: 0000: 14000000 FFFFFFFF 01000178 100C0708 +// ELF_64-NEXT: 0000: 14000000 FFFFFFFF 03000178 100C0708 // ELF_64-NEXT: 0010: 90010000 00000000 14000000 00000000 // ELF_64-NEXT: 0020: 00000000 00000000 01000000 00000000 // ELF_64-NEXT: 0030: 14000000 00000000 00000000 00000000 @@ -47,7 +47,7 @@ f2: // ELF_32-NEXT: AddressAlignment: 4 // ELF_32-NEXT: EntrySize: 0 // ELF_32-NEXT: SectionData ( -// ELF_32-NEXT: 0000: 10000000 FFFFFFFF 0100017C 080C0404 +// ELF_32-NEXT: 0000: 10000000 FFFFFFFF 0300017C 080C0404 // ELF_32-NEXT: 0010: 88010000 0C000000 00000000 00000000 // ELF_32-NEXT: 0020: 01000000 0C000000 00000000 01000000 // ELF_32-NEXT: 0030: 01000000 diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s index 023311962189..98deb0a1de5c 100644 --- a/test/MC/ELF/cfi-signal-frame.s +++ b/test/MC/ELF/cfi-signal-frame.s @@ -23,10 +23,10 @@ g: // CHECK-NEXT: AddressAlignment: 8 // CHECK-NEXT: EntrySize: 0 // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5253 00017810 +// CHECK-NEXT: 0000: 14000000 00000000 037A5253 00017810 // CHECK-NEXT: 0010: 011B0C07 08900100 10000000 1C000000 // CHECK-NEXT: 0020: 00000000 00000000 00000000 14000000 -// CHECK-NEXT: 0030: 00000000 017A5200 01781001 1B0C0708 +// CHECK-NEXT: 0030: 00000000 037A5200 01781001 1B0C0708 // CHECK-NEXT: 0040: 90010000 10000000 1C000000 00000000 // CHECK-NEXT: 0050: 00000000 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s index 9773a36a3b03..568b3159cc44 100644 --- a/test/MC/ELF/cfi-undefined.s +++ b/test/MC/ELF/cfi-undefined.s @@ -24,7 +24,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00410706 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll new file mode 100644 index 000000000000..a06638c04e0e --- /dev/null +++ b/test/MC/ELF/cfi-version.ll @@ -0,0 +1,47 @@ +; RUN: llc %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2 +; RUN: llc %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 +; RUN: llc %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv8-arm-none-eabi" + +; Function Attrs: nounwind +define i32 @foo() #0 { +entry: + %call = call i32 bitcast (i32 (...)* @bar to i32 ()*)(), !dbg !12 + %add = add nsw i32 %call, 1, !dbg !12 + ret i32 %add, !dbg !12 +} + +declare i32 @bar(...) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99] +!1 = metadata !{metadata !"test.c", metadata !"/tmp"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @foo, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/test.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!11 = metadata !{metadata !"clang version 3.5.0 "} +!12 = metadata !{i32 2, i32 0, metadata !4, null} + +; DWARF2: .debug_frame contents: +; DWARF2: 00000000 0000000c ffffffff CIE +; DWARF2-NEXT: Version: 1 +; DWARF2-NEXT: Augmentation: + +; DWARF34: .debug_frame contents: +; DWARF34: 00000000 0000000c ffffffff CIE +; DWARF34-NEXT: Version: 3 +; DWARF34-NEXT: Augmentation: diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s index c7d438a19260..b083901c137a 100644 --- a/test/MC/ELF/cfi-window-save.s +++ b/test/MC/ELF/cfi-window-save.s @@ -26,7 +26,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 02000000 00412D00 00000000 // CHECK-NEXT: ) diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s index 05cb0ae35bd2..8662839b5274 100644 --- a/test/MC/ELF/cfi-zero-addr-delta.s +++ b/test/MC/ELF/cfi-zero-addr-delta.s @@ -30,7 +30,7 @@ f: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// CHECK-NEXT: 0000: 14000000 00000000 037A5200 01781001 // CHECK-NEXT: 0010: 1B0C0708 90010000 1C000000 1C000000 // CHECK-NEXT: 0020: 00000000 04000000 00410E10 410A0E08 // CHECK-NEXT: 0030: 410B0000 00000000 diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s index fd229b6064ad..21be615c5f39 100644 --- a/test/MC/ELF/cfi.s +++ b/test/MC/ELF/cfi.s @@ -234,116 +234,116 @@ f37: // CHECK-NEXT: Relocations [ // CHECK-NEXT: ] // CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A4C52 00017810 +// CHECK-NEXT: 0000: 14000000 00000000 037A4C52 00017810 // CHECK-NEXT: 0010: 02031B0C 07089001 14000000 1C000000 // CHECK-NEXT: 0020: 00000000 01000000 04000000 00000000 -// CHECK-NEXT: 0030: 20000000 00000000 017A504C 52000178 +// CHECK-NEXT: 0030: 20000000 00000000 037A504C 52000178 // CHECK-NEXT: 0040: 100B0000 00000000 00000003 1B0C0708 // CHECK-NEXT: 0050: 90010000 14000000 28000000 00000000 // CHECK-NEXT: 0060: 01000000 04000000 00000000 14000000 // CHECK-NEXT: 0070: 70000000 00000000 01000000 04000000 -// CHECK-NEXT: 0080: 00000000 20000000 00000000 017A504C +// CHECK-NEXT: 0080: 00000000 20000000 00000000 037A504C // CHECK-NEXT: 0090: 52000178 100B0000 00000000 00000002 // CHECK-NEXT: 00A0: 1B0C0708 90010000 10000000 28000000 // CHECK-NEXT: 00B0: 00000000 01000000 02000000 18000000 -// CHECK-NEXT: 00C0: 00000000 017A5052 00017810 04020000 +// CHECK-NEXT: 00C0: 00000000 037A5052 00017810 04020000 // CHECK-NEXT: 00D0: 1B0C0708 90010000 10000000 20000000 // CHECK-NEXT: 00E0: 00000000 01000000 00000000 18000000 -// CHECK-NEXT: 00F0: 00000000 017A5052 00017810 06030000 +// CHECK-NEXT: 00F0: 00000000 037A5052 00017810 06030000 // CHECK-NEXT: 0100: 00001B0C 07089001 10000000 20000000 // CHECK-NEXT: 0110: 00000000 01000000 00000000 1C000000 -// CHECK-NEXT: 0120: 00000000 017A5052 00017810 0A040000 +// CHECK-NEXT: 0120: 00000000 037A5052 00017810 0A040000 // CHECK-NEXT: 0130: 00000000 00001B0C 07089001 10000000 // CHECK-NEXT: 0140: 24000000 00000000 01000000 00000000 -// CHECK-NEXT: 0150: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0150: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 0160: 040A0000 1B0C0708 90010000 10000000 // CHECK-NEXT: 0170: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 0180: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0180: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 0190: 060B0000 00001B0C 07089001 10000000 // CHECK-NEXT: 01A0: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 01B0: 1C000000 00000000 017A5052 00017810 +// CHECK-NEXT: 01B0: 1C000000 00000000 037A5052 00017810 // CHECK-NEXT: 01C0: 0A0C0000 00000000 00001B0C 07089001 // CHECK-NEXT: 01D0: 10000000 24000000 00000000 01000000 -// CHECK-NEXT: 01E0: 00000000 1C000000 00000000 017A5052 +// CHECK-NEXT: 01E0: 00000000 1C000000 00000000 037A5052 // CHECK-NEXT: 01F0: 00017810 0A080000 00000000 00001B0C // CHECK-NEXT: 0200: 07089001 10000000 24000000 00000000 // CHECK-NEXT: 0210: 01000000 00000000 1C000000 00000000 -// CHECK-NEXT: 0220: 017A5052 00017810 0A100000 00000000 +// CHECK-NEXT: 0220: 037A5052 00017810 0A100000 00000000 // CHECK-NEXT: 0230: 00001B0C 07089001 10000000 24000000 // CHECK-NEXT: 0240: 00000000 01000000 00000000 18000000 -// CHECK-NEXT: 0250: 00000000 017A5052 00017810 04120000 +// CHECK-NEXT: 0250: 00000000 037A5052 00017810 04120000 // CHECK-NEXT: 0260: 1B0C0708 90010000 10000000 20000000 // CHECK-NEXT: 0270: 00000000 01000000 00000000 18000000 -// CHECK-NEXT: 0280: 00000000 017A5052 00017810 06130000 +// CHECK-NEXT: 0280: 00000000 037A5052 00017810 06130000 // CHECK-NEXT: 0290: 00001B0C 07089001 10000000 20000000 // CHECK-NEXT: 02A0: 00000000 01000000 00000000 1C000000 -// CHECK-NEXT: 02B0: 00000000 017A5052 00017810 0A140000 +// CHECK-NEXT: 02B0: 00000000 037A5052 00017810 0A140000 // CHECK-NEXT: 02C0: 00000000 00001B0C 07089001 10000000 // CHECK-NEXT: 02D0: 24000000 00000000 01000000 00000000 -// CHECK-NEXT: 02E0: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 02E0: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 02F0: 041A0000 1B0C0708 90010000 10000000 // CHECK-NEXT: 0300: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 0310: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0310: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 0320: 061B0000 00001B0C 07089001 10000000 // CHECK-NEXT: 0330: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 0340: 1C000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0340: 1C000000 00000000 037A5052 00017810 // CHECK-NEXT: 0350: 0A1C0000 00000000 00001B0C 07089001 // CHECK-NEXT: 0360: 10000000 24000000 00000000 01000000 -// CHECK-NEXT: 0370: 00000000 1C000000 00000000 017A5052 +// CHECK-NEXT: 0370: 00000000 1C000000 00000000 037A5052 // CHECK-NEXT: 0380: 00017810 0A180000 00000000 00001B0C // CHECK-NEXT: 0390: 07089001 10000000 24000000 00000000 // CHECK-NEXT: 03A0: 01000000 00000000 1C000000 00000000 -// CHECK-NEXT: 03B0: 017A5052 00017810 0A800000 00000000 +// CHECK-NEXT: 03B0: 037A5052 00017810 0A800000 00000000 // CHECK-NEXT: 03C0: 00001B0C 07089001 10000000 24000000 // CHECK-NEXT: 03D0: 00000000 01000000 00000000 18000000 -// CHECK-NEXT: 03E0: 00000000 017A5052 00017810 04820000 +// CHECK-NEXT: 03E0: 00000000 037A5052 00017810 04820000 // CHECK-NEXT: 03F0: 1B0C0708 90010000 10000000 20000000 // CHECK-NEXT: 0400: 00000000 01000000 00000000 18000000 -// CHECK-NEXT: 0410: 00000000 017A5052 00017810 06830000 +// CHECK-NEXT: 0410: 00000000 037A5052 00017810 06830000 // CHECK-NEXT: 0420: 00001B0C 07089001 10000000 20000000 // CHECK-NEXT: 0430: 00000000 01000000 00000000 1C000000 -// CHECK-NEXT: 0440: 00000000 017A5052 00017810 0A840000 +// CHECK-NEXT: 0440: 00000000 037A5052 00017810 0A840000 // CHECK-NEXT: 0450: 00000000 00001B0C 07089001 10000000 // CHECK-NEXT: 0460: 24000000 00000000 01000000 00000000 -// CHECK-NEXT: 0470: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0470: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 0480: 048A0000 1B0C0708 90010000 10000000 // CHECK-NEXT: 0490: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 04A0: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 04A0: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 04B0: 068B0000 00001B0C 07089001 10000000 // CHECK-NEXT: 04C0: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 04D0: 1C000000 00000000 017A5052 00017810 +// CHECK-NEXT: 04D0: 1C000000 00000000 037A5052 00017810 // CHECK-NEXT: 04E0: 0A8C0000 00000000 00001B0C 07089001 // CHECK-NEXT: 04F0: 10000000 24000000 00000000 01000000 -// CHECK-NEXT: 0500: 00000000 1C000000 00000000 017A5052 +// CHECK-NEXT: 0500: 00000000 1C000000 00000000 037A5052 // CHECK-NEXT: 0510: 00017810 0A880000 00000000 00001B0C // CHECK-NEXT: 0520: 07089001 10000000 24000000 00000000 // CHECK-NEXT: 0530: 01000000 00000000 1C000000 00000000 -// CHECK-NEXT: 0540: 017A5052 00017810 0A900000 00000000 +// CHECK-NEXT: 0540: 037A5052 00017810 0A900000 00000000 // CHECK-NEXT: 0550: 00001B0C 07089001 10000000 24000000 // CHECK-NEXT: 0560: 00000000 01000000 00000000 18000000 -// CHECK-NEXT: 0570: 00000000 017A5052 00017810 04920000 +// CHECK-NEXT: 0570: 00000000 037A5052 00017810 04920000 // CHECK-NEXT: 0580: 1B0C0708 90010000 10000000 20000000 // CHECK-NEXT: 0590: 00000000 01000000 00000000 18000000 -// CHECK-NEXT: 05A0: 00000000 017A5052 00017810 06930000 +// CHECK-NEXT: 05A0: 00000000 037A5052 00017810 06930000 // CHECK-NEXT: 05B0: 00001B0C 07089001 10000000 20000000 // CHECK-NEXT: 05C0: 00000000 01000000 00000000 1C000000 -// CHECK-NEXT: 05D0: 00000000 017A5052 00017810 0A940000 +// CHECK-NEXT: 05D0: 00000000 037A5052 00017810 0A940000 // CHECK-NEXT: 05E0: 00000000 00001B0C 07089001 10000000 // CHECK-NEXT: 05F0: 24000000 00000000 01000000 00000000 -// CHECK-NEXT: 0600: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0600: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 0610: 049A0000 1B0C0708 90010000 10000000 // CHECK-NEXT: 0620: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 0630: 18000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0630: 18000000 00000000 037A5052 00017810 // CHECK-NEXT: 0640: 069B0000 00001B0C 07089001 10000000 // CHECK-NEXT: 0650: 20000000 00000000 01000000 00000000 -// CHECK-NEXT: 0660: 1C000000 00000000 017A5052 00017810 +// CHECK-NEXT: 0660: 1C000000 00000000 037A5052 00017810 // CHECK-NEXT: 0670: 0A9C0000 00000000 00001B0C 07089001 // CHECK-NEXT: 0680: 10000000 24000000 00000000 01000000 -// CHECK-NEXT: 0690: 00000000 1C000000 00000000 017A5052 +// CHECK-NEXT: 0690: 00000000 1C000000 00000000 037A5052 // CHECK-NEXT: 06A0: 00017810 0A980000 00000000 00001B0C // CHECK-NEXT: 06B0: 07089001 10000000 24000000 00000000 // CHECK-NEXT: 06C0: 01000000 00000000 10000000 00000000 -// CHECK-NEXT: 06D0: 017A5200 01781001 1B000000 10000000 +// CHECK-NEXT: 06D0: 037A5200 01781001 1B000000 10000000 // CHECK-NEXT: 06E0: 18000000 00000000 01000000 00000000 // CHECK-NEXT: ) // CHECK-NEXT: } diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s index 167159885d72..d6b9cf0a5405 100644 --- a/test/MC/Mips/eh-frame.s +++ b/test/MC/Mips/eh-frame.s @@ -31,7 +31,7 @@ func: // MIPS32: 00000000 // Version -// MIPS32: 01 +// MIPS32: 03 // Augmentation String // MIPS32: 7a5200 @@ -67,7 +67,7 @@ func: // MIPS32EL: 00000000 // Version -// MIPS32EL: 01 +// MIPS32EL: 03 // Augmentation String // MIPS32EL: 7a5200 @@ -103,7 +103,7 @@ func: // MIPS64: 00000000 // Version -// MIPS64: 01 +// MIPS64: 03 // Augmentation String // MIPS64: 7a5200 @@ -141,7 +141,7 @@ func: // MIPS64EL: 00000000 // Version -// MIPS64EL: 01 +// MIPS64EL: 03 // Augmentation String // MIPS64EL: 7a5200 diff --git a/test/MC/PowerPC/ppc64-initial-cfa.s b/test/MC/PowerPC/ppc64-initial-cfa.s index ca97e1b96b07..d0bc6b3ecd74 100644 --- a/test/MC/PowerPC/ppc64-initial-cfa.s +++ b/test/MC/PowerPC/ppc64-initial-cfa.s @@ -28,8 +28,8 @@ _proc: # STATIC-NEXT: Relocations [ # STATIC-NEXT: ] # STATIC-NEXT: SectionData ( -# STATIC-BE-NEXT: 0000: 00000010 00000000 017A5200 04784101 -# STATIC-LE-NEXT: 0000: 10000000 00000000 017A5200 04784101 +# STATIC-BE-NEXT: 0000: 00000010 00000000 037A5200 04784101 +# STATIC-LE-NEXT: 0000: 10000000 00000000 037A5200 04784101 # STATIC-BE-NEXT: 0010: 1B0C0100 00000010 00000018 00000000 # STATIC-LE-NEXT: 0010: 1B0C0100 10000000 18000000 00000000 # STATIC-BE-NEXT: 0020: 00000004 00000000 @@ -69,8 +69,8 @@ _proc: # PIC-NEXT: Relocations [ # PIC-NEXT: ] # PIC-NEXT: SectionData ( -# PIC-BE-NEXT: 0000: 00000010 00000000 017A5200 04784101 -# PIC-LE-NEXT: 0000: 10000000 00000000 017A5200 04784101 +# PIC-BE-NEXT: 0000: 00000010 00000000 037A5200 04784101 +# PIC-LE-NEXT: 0000: 10000000 00000000 037A5200 04784101 # PIC-BE-NEXT: 0010: 1B0C0100 00000010 00000018 00000000 # PIC-LE-NEXT: 0010: 1B0C0100 10000000 18000000 00000000 # PIC-BE-NEXT: 0020: 00000004 00000000 From 591f9ee07680fb678e4510c94ff65b0d0a48d224 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 19 Jun 2014 15:52:37 +0000 Subject: [PATCH 0034/1155] Emit DWARF info for all code section in an assembly file Currently, when using llvm as an assembler, DWARF debug information is only generated for the .text section. This patch modifies this so that DWARF info is emitted for all executable sections. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211273 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/MapVector.h | 9 ++ include/llvm/MC/MCContext.h | 29 ++--- include/llvm/MC/MCObjectStreamer.h | 4 + include/llvm/MC/MCStreamer.h | 2 + lib/MC/MCContext.cpp | 23 ++++ lib/MC/MCDwarf.cpp | 174 ++++++++++++++++++++++------- lib/MC/MCParser/AsmParser.cpp | 17 +-- lib/MC/MCParser/ELFAsmParser.cpp | 32 ++++-- 8 files changed, 220 insertions(+), 70 deletions(-) diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h index 7fd1570cbf12..8f8b7ba2d7ba 100644 --- a/include/llvm/ADT/MapVector.h +++ b/include/llvm/ADT/MapVector.h @@ -123,6 +123,15 @@ class MapVector { Map.erase(Pos); Vector.pop_back(); } + + /// \brief Remove the element given by Iterator. + /// Returns an iterator to the element following the one which was removed, + /// which may be end(). + typename VectorType::iterator erase(typename VectorType::iterator Iterator) { + typename MapType::iterator MapIterator = Map.find(Iterator->first); + Map.erase(MapIterator); + return Vector.erase(Iterator); + } }; } diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 2f9b32b984ec..ce3a99bb3fe1 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -11,10 +11,12 @@ #define LLVM_MC_MCCONTEXT_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -129,11 +131,10 @@ namespace llvm { /// assembly source files. unsigned GenDwarfFileNumber; - /// The default initial text section that we generate dwarf debugging line - /// info for when generating dwarf assembly source files. - const MCSection *GenDwarfSection; - /// Symbols created for the start and end of this section. - MCSymbol *GenDwarfSectionStartSym, *GenDwarfSectionEndSym; + /// Symbols created for the start and end of each section, used for + /// generating the .debug_ranges and .debug_aranges sections. + MapVector > + SectionStartEndSyms; /// The information gathered from labels that will have dwarf label /// entries when generating dwarf assembly source files. @@ -374,16 +375,18 @@ namespace llvm { void setGenDwarfFileNumber(unsigned FileNumber) { GenDwarfFileNumber = FileNumber; } - const MCSection *getGenDwarfSection() { return GenDwarfSection; } - void setGenDwarfSection(const MCSection *Sec) { GenDwarfSection = Sec; } - MCSymbol *getGenDwarfSectionStartSym() { return GenDwarfSectionStartSym; } - void setGenDwarfSectionStartSym(MCSymbol *Sym) { - GenDwarfSectionStartSym = Sym; + MapVector > & + getGenDwarfSectionSyms() { + return SectionStartEndSyms; } - MCSymbol *getGenDwarfSectionEndSym() { return GenDwarfSectionEndSym; } - void setGenDwarfSectionEndSym(MCSymbol *Sym) { - GenDwarfSectionEndSym = Sym; + std::pair >::iterator, + bool> + addGenDwarfSection(const MCSection *Sec) { + return SectionStartEndSyms.insert( + std::make_pair(Sec, std::make_pair(nullptr, nullptr))); } + void finalizeDwarfSections(MCStreamer &MCOS); const std::vector &getMCGenDwarfLabelEntries() const { return MCGenDwarfLabelEntries; } diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index e41a8ba63725..a8ba23ac00f3 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -126,6 +126,10 @@ class MCObjectStreamer : public MCStreamer { void EmitFill(uint64_t NumBytes, uint8_t FillValue) override; void EmitZeros(uint64_t NumBytes) override; void FinishImpl() override; + + virtual bool mayHaveInstructions() const { + return getCurrentSectionData()->hasInstructions(); + } }; } // end namespace llvm diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 2a8367afeb8f..1c6039bf6d4d 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -722,6 +722,8 @@ class MCStreamer { virtual void FinishImpl() = 0; /// Finish - Finish emission of machine code. void Finish(); + + virtual bool mayHaveInstructions() const { return true; } }; /// createNullStreamer - Create a dummy machine code streamer, which does diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index cc98be6ea63f..bd2c4e960ac4 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -340,6 +340,29 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) { return !MCDwarfFiles[FileNumber].Name.empty(); } +/// finalizeDwarfSections - Emit end symbols for each non-empty code section. +/// Also remove empty sections from SectionStartEndSyms, to avoid generating +/// useless debug info for them. +void MCContext::finalizeDwarfSections(MCStreamer &MCOS) { + MCContext &context = MCOS.getContext(); + + auto sec = SectionStartEndSyms.begin(); + while (sec != SectionStartEndSyms.end()) { + assert(sec->second.first && "Start symbol must be set by now"); + MCOS.SwitchSection(sec->first); + if (MCOS.mayHaveInstructions()) { + MCSymbol *SectionEndSym = context.CreateTempSymbol(); + MCOS.EmitLabel(SectionEndSym); + sec->second.second = SectionEndSym; + ++sec; + } else { + MapVector >::iterator + to_erase = sec; + sec = SectionStartEndSyms.erase(to_erase); + } + } +} + void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const { // If we have a source manager and a location, use it. Otherwise just // use the generic report_fatal_error(). diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 1016466839f6..995bee121324 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -518,8 +519,12 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit); MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1); EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4); - EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); - EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); + if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1) { + EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4); + } else { + EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); + EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); + } EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string); if (!context.getCompilationDir().empty()) EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string); @@ -552,20 +557,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { } // When generating dwarf for assembly source files this emits the data for -// .debug_aranges section. Which contains a header and a table of pairs of -// PointerSize'ed values for the address and size of section(s) with line table -// entries (just the default .text in our case) and a terminating pair of zeros. +// .debug_aranges section. This section contains a header and a table of pairs +// of PointerSize'ed values for the address and size of section(s) with line +// table entries. static void EmitGenDwarfAranges(MCStreamer *MCOS, const MCSymbol *InfoSectionSymbol) { MCContext &context = MCOS->getContext(); - // Create a symbol at the end of the section that we are creating the dwarf - // debugging info to use later in here as part of the expression to calculate - // the size of the section for the table. - MCOS->SwitchSection(context.getGenDwarfSection()); - MCSymbol *SectionEndSym = context.CreateTempSymbol(); - MCOS->EmitLabel(SectionEndSym); - context.setGenDwarfSectionEndSym(SectionEndSym); + auto &Sections = context.getGenDwarfSectionSyms(); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); @@ -583,8 +582,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, Length += Pad; // Add the size of the pair of PointerSize'ed values for the address and size - // of the one default .text section we have in the table. - Length += 2 * AddrSize; + // of each section we have in the table. + Length += 2 * AddrSize * Sections.size(); // And the pair of terminating zeros. Length += 2 * AddrSize; @@ -608,14 +607,21 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, for(int i = 0; i < Pad; i++) MCOS->EmitIntValue(0, 1); - // Now emit the table of pairs of PointerSize'ed values for the section(s) - // address and size, in our case just the one default .text section. - const MCExpr *Addr = MCSymbolRefExpr::Create( - context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context); - const MCExpr *Size = MakeStartMinusEndExpr(*MCOS, - *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0); - MCOS->EmitValue(Addr, AddrSize); - MCOS->EmitAbsValue(Size, AddrSize); + // Now emit the table of pairs of PointerSize'ed values for the section + // addresses and sizes. + for (const auto &sec : Sections) { + MCSymbol* StartSymbol = sec.second.first; + MCSymbol* EndSymbol = sec.second.second; + assert(StartSymbol && "StartSymbol must not be NULL"); + assert(EndSymbol && "EndSymbol must not be NULL"); + + const MCExpr *Addr = MCSymbolRefExpr::Create( + StartSymbol, MCSymbolRefExpr::VK_None, context); + const MCExpr *Size = MakeStartMinusEndExpr(*MCOS, + *StartSymbol, *EndSymbol, 0); + MCOS->EmitValue(Addr, AddrSize); + MCOS->EmitAbsValue(Size, AddrSize); + } // And finally the pair of terminating zeros. MCOS->EmitIntValue(0, AddrSize); @@ -627,7 +633,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, // DIE and a list of label DIEs. static void EmitGenDwarfInfo(MCStreamer *MCOS, const MCSymbol *AbbrevSectionSymbol, - const MCSymbol *LineSectionSymbol) { + const MCSymbol *LineSectionSymbol, + const MCSymbol *RangesSectionSymbol) { MCContext &context = MCOS->getContext(); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); @@ -674,15 +681,37 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitIntValue(0, 4); } - // AT_low_pc, the first address of the default .text section. - const MCExpr *Start = MCSymbolRefExpr::Create( - context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context); - MCOS->EmitValue(Start, AddrSize); + if (RangesSectionSymbol) { + // There are multiple sections containing code, so we must use the + // .debug_ranges sections. - // AT_high_pc, the last address of the default .text section. - const MCExpr *End = MCSymbolRefExpr::Create( - context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context); - MCOS->EmitValue(End, AddrSize); + // AT_ranges, the 4 byte offset from the start of the .debug_ranges section + // to the address range list for this compilation unit. + MCOS->EmitSymbolValue(RangesSectionSymbol, 4); + } else { + // If we only have one non-empty code section, we can use the simpler + // AT_low_pc and AT_high_pc attributes. + + // Find the first (and only) non-empty text section + auto &Sections = context.getGenDwarfSectionSyms(); + const auto TextSection = Sections.begin(); + assert(TextSection != Sections.end() && "No text section found"); + + MCSymbol* StartSymbol = TextSection->second.first; + MCSymbol* EndSymbol = TextSection->second.second; + assert(StartSymbol && "StartSymbol must not be NULL"); + assert(EndSymbol && "EndSymbol must not be NULL"); + + // AT_low_pc, the first address of the default .text section. + const MCExpr *Start = MCSymbolRefExpr::Create( + StartSymbol, MCSymbolRefExpr::VK_None, context); + MCOS->EmitValue(Start, AddrSize); + + // AT_high_pc, the last address of the default .text section. + const MCExpr *End = MCSymbolRefExpr::Create( + EndSymbol, MCSymbolRefExpr::VK_None, context); + MCOS->EmitValue(End, AddrSize); + } // AT_name, the name of the source file. Reconstruct from the first directory // and file table entries. @@ -766,13 +795,51 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitLabel(InfoEnd); } +// When generating dwarf for assembly source files this emits the data for +// .debug_ranges section. We only emit one range list, which spans all of the +// executable sections of this file. +static void EmitGenDwarfRanges(MCStreamer *MCOS) { + MCContext &context = MCOS->getContext(); + auto &Sections = context.getGenDwarfSectionSyms(); + + const MCAsmInfo *AsmInfo = context.getAsmInfo(); + int AddrSize = AsmInfo->getPointerSize(); + + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); + + for (const auto sec : Sections) { + + MCSymbol* StartSymbol = sec.second.first; + MCSymbol* EndSymbol = sec.second.second; + assert(StartSymbol && "StartSymbol must not be NULL"); + assert(EndSymbol && "EndSymbol must not be NULL"); + + // Emit a base address selection entry for the start of this section + const MCExpr *SectionStartAddr = MCSymbolRefExpr::Create( + StartSymbol, MCSymbolRefExpr::VK_None, context); + MCOS->EmitFill(AddrSize, 0xFF); + MCOS->EmitValue(SectionStartAddr, AddrSize); + + // Emit a range list entry spanning this section + const MCExpr *SectionSize = MakeStartMinusEndExpr(*MCOS, + *StartSymbol, *EndSymbol, 0); + MCOS->EmitIntValue(0, AddrSize); + MCOS->EmitAbsValue(SectionSize, AddrSize); + } + + // Emit end of list entry + MCOS->EmitIntValue(0, AddrSize); + MCOS->EmitIntValue(0, AddrSize); +} + // // When generating dwarf for assembly source files this emits the Dwarf // sections. // void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { - // Create the dwarf sections in this order (.debug_line already created). MCContext &context = MCOS->getContext(); + + // Create the dwarf sections in this order (.debug_line already created). const MCAsmInfo *AsmInfo = context.getAsmInfo(); bool CreateDwarfSectionSymbols = AsmInfo->doesDwarfUseRelocationsAcrossSections(); @@ -781,6 +848,22 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0); MCSymbol *AbbrevSectionSymbol = nullptr; MCSymbol *InfoSectionSymbol = nullptr; + MCSymbol *RangesSectionSymbol = NULL; + + // Create end symbols for each section, and remove empty sections + MCOS->getContext().finalizeDwarfSections(*MCOS); + + // If there are no sections to generate debug info for, we don't need + // to do anything + if (MCOS->getContext().getGenDwarfSectionSyms().empty()) + return; + + // We only need to use the .debug_ranges section if we have multiple + // code sections. + const bool UseRangesSection = + MCOS->getContext().getGenDwarfSectionSyms().size() > 1; + CreateDwarfSectionSymbols |= UseRangesSection; + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); if (CreateDwarfSectionSymbols) { InfoSectionSymbol = context.CreateTempSymbol(); @@ -791,20 +874,30 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { AbbrevSectionSymbol = context.CreateTempSymbol(); MCOS->EmitLabel(AbbrevSectionSymbol); } - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); + if (UseRangesSection) { + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); + if (CreateDwarfSectionSymbols) { + RangesSectionSymbol = context.CreateTempSymbol(); + MCOS->EmitLabel(RangesSectionSymbol); + } + } - // If there are no line table entries then do not emit any section contents. - if (!context.hasMCLineSections()) - return; + assert((RangesSectionSymbol != NULL) || !UseRangesSection); + + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); // Output the data for .debug_aranges section. EmitGenDwarfAranges(MCOS, InfoSectionSymbol); + if (UseRangesSection) + EmitGenDwarfRanges(MCOS); + // Output the data for .debug_abbrev section. EmitGenDwarfAbbrev(MCOS); // Output the data for .debug_info section. - EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol); + EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol, + RangesSectionSymbol); } // @@ -815,12 +908,13 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { // void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, SourceMgr &SrcMgr, SMLoc &Loc) { - // We won't create dwarf labels for temporary symbols or symbols not in - // the default text. + // We won't create dwarf labels for temporary symbols. if (Symbol->isTemporary()) return; MCContext &context = MCOS->getContext(); - if (context.getGenDwarfSection() != MCOS->getCurrentSection().first) + // We won't create dwarf labels for symbols in sections that we are not + // generating debug info for. + if (!context.getGenDwarfSectionSyms().count(MCOS->getCurrentSection().first)) return; // The dwarf label's name does not have the symbol name's leading diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 1ba02d181dbb..50375d37d841 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -633,10 +633,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // If we are generating dwarf for assembly source files save the initial text // section and generate a .file directive. if (getContext().getGenDwarfForAssembly()) { - getContext().setGenDwarfSection(getStreamer().getCurrentSection().first); MCSymbol *SectionStartSym = getContext().CreateTempSymbol(); getStreamer().EmitLabel(SectionStartSym); - getContext().setGenDwarfSectionStartSym(SectionStartSym); + auto InsertResult = getContext().addGenDwarfSection( + getStreamer().getCurrentSection().first); + assert(InsertResult.second && ".text section should not have debug info yet"); + InsertResult.first->second.first = SectionStartSym; getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective( 0, StringRef(), getContext().getMainFileName())); } @@ -1592,12 +1594,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) { printMessage(IDLoc, SourceMgr::DK_Note, OS.str()); } - // If we are generating dwarf for assembly source files and the current - // section is the initial text section then generate a .loc directive for - // the instruction. + // If we are generating dwarf for the current section then generate a .loc + // directive for the instruction. if (!HadError && getContext().getGenDwarfForAssembly() && - getContext().getGenDwarfSection() == - getStreamer().getCurrentSection().first) { + getContext().getGenDwarfSectionSyms().count( + getStreamer().getCurrentSection().first)) { unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); @@ -2029,7 +2030,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, break; if (FAI >= NParameters) { - assert(M && "expected macro to be defined"); + assert(M && "expected macro to be defined"); Error(IDLoc, "parameter named '" + FA.Name + "' does not exist for macro '" + M->Name + "'"); diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 78bb6c7aad0e..98b2b3bd60b2 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -150,7 +150,7 @@ class ELFAsmParser : public MCAsmParserExtension { private: bool ParseSectionName(StringRef &SectionName); - bool ParseSectionArguments(bool IsPush); + bool ParseSectionArguments(bool IsPush, SMLoc loc); unsigned parseSunStyleSectionFlags(); }; @@ -382,7 +382,7 @@ unsigned ELFAsmParser::parseSunStyleSectionFlags() { bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) { getStreamer().PushSection(); - if (ParseSectionArguments(/*IsPush=*/true)) { + if (ParseSectionArguments(/*IsPush=*/true, loc)) { getStreamer().PopSection(); return true; } @@ -397,11 +397,11 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { } // FIXME: This is a work in progress. -bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { - return ParseSectionArguments(/*IsPush=*/false); +bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc loc) { + return ParseSectionArguments(/*IsPush=*/false, loc); } -bool ELFAsmParser::ParseSectionArguments(bool IsPush) { +bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { StringRef SectionName; if (ParseSectionName(SectionName)) @@ -545,10 +545,24 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) { } SectionKind Kind = computeSectionKind(Flags, Size); - getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type, - Flags, Kind, Size, - GroupName), - Subsection); + const MCSection *ELFSection = getContext().getELFSection( + SectionName, Type, Flags, Kind, Size, GroupName); + getStreamer().SwitchSection(ELFSection, Subsection); + + if (getContext().getGenDwarfForAssembly()) { + auto &Sections = getContext().getGenDwarfSectionSyms(); + auto InsertResult = Sections.insert( + std::make_pair(ELFSection, std::make_pair(nullptr, nullptr))); + if (InsertResult.second) { + if (getContext().getDwarfVersion() <= 2) + Error(loc, "DWARF2 only supports one section per compilation unit"); + + MCSymbol *SectionStartSymbol = getContext().CreateTempSymbol(); + getStreamer().EmitLabel(SectionStartSymbol); + InsertResult.first->second.first = SectionStartSymbol; + } + } + return false; } From 761f33167f2ae180dddcf48abcfcc3ead0c2d219 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 19 Jun 2014 16:17:42 +0000 Subject: [PATCH 0035/1155] Kill the LLVM global lock. This patch removes the LLVM global lock, and updates all existing users of the global lock to use their own mutex. None of the existing users of the global lock were protecting code that was mutually exclusive with any of the other users of the global lock, so its purpose was not being met. Reviewed by: rnk Differential Revision: http://reviews.llvm.org/D4142 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211277 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/ProgrammersManual.rst | 5 ----- include/llvm/Support/Threading.h | 8 -------- lib/Support/ManagedStatic.cpp | 23 ++++++++++++++++++++--- lib/Support/Threading.cpp | 12 ------------ lib/Support/Timer.cpp | 4 ++-- 5 files changed, 22 insertions(+), 30 deletions(-) diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 4973e5c66719..df3795a34d33 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -2226,11 +2226,6 @@ Note that, because no other threads are allowed to issue LLVM API calls before ``llvm_start_multithreaded()`` returns, it is possible to have ``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s. -The ``llvm_acquire_global_lock()`` and ``llvm_release_global_lock`` APIs provide -access to the global lock used to implement the double-checked locking for lazy -initialization. These should only be used internally to LLVM, and only if you -know what you're doing! - .. _llvmcontext: Achieving Isolation with ``LLVMContext`` diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h index a7e8774558d5..d84d11867958 100644 --- a/include/llvm/Support/Threading.h +++ b/include/llvm/Support/Threading.h @@ -33,14 +33,6 @@ namespace llvm { /// mode or not. bool llvm_is_multithreaded(); - /// acquire_global_lock - Acquire the global lock. This is a no-op if called - /// before llvm_start_multithreaded(). - void llvm_acquire_global_lock(); - - /// release_global_lock - Release the global lock. This is a no-op if called - /// before llvm_start_multithreaded(). - void llvm_release_global_lock(); - /// llvm_execute_on_thread - Execute the given \p UserFn on a separate /// thread, passing it the provided \p UserData. /// diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index 6a1c2a545a8d..9d7e99f97fb5 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -15,15 +15,32 @@ #include "llvm/Config/config.h" #include "llvm/Support/Atomic.h" #include +#include using namespace llvm; static const ManagedStaticBase *StaticList = nullptr; +// ManagedStatics can get created during execution of static constructors. As a +// result, we cannot use a global static std::mutex object for the lock since it +// may not have been constructed. Instead, we do a call-once initialization of +// a pointer to a mutex. +static std::once_flag MutexInitializationFlag; +static std::recursive_mutex* ManagedStaticMutex = nullptr; + +// Not all supported platforms (in particular VS2012) have thread-safe function +// static initialization, so roll our own. +static std::recursive_mutex& GetManagedStaticMutex() { + std::call_once(MutexInitializationFlag, + []() { ManagedStaticMutex = new std::recursive_mutex(); } ); + + return *ManagedStaticMutex; +} + void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), void (*Deleter)(void*)) const { assert(Creator); if (llvm_is_multithreaded()) { - llvm_acquire_global_lock(); + std::lock_guard Lock(GetManagedStaticMutex()); if (!Ptr) { void* tmp = Creator(); @@ -43,8 +60,6 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), Next = StaticList; StaticList = this; } - - llvm_release_global_lock(); } else { assert(!Ptr && !DeleterFn && !Next && "Partially initialized ManagedStatic!?"); @@ -75,6 +90,8 @@ void ManagedStaticBase::destroy() const { /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. void llvm::llvm_shutdown() { + std::lock_guard Lock(GetManagedStaticMutex()); + while (StaticList) StaticList->destroy(); diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp index 1acfa79b11d5..2358dde0e6c5 100644 --- a/lib/Support/Threading.cpp +++ b/lib/Support/Threading.cpp @@ -21,13 +21,10 @@ using namespace llvm; static bool multithreaded_mode = false; -static sys::Mutex* global_lock = nullptr; - bool llvm::llvm_start_multithreaded() { #if LLVM_ENABLE_THREADS != 0 assert(!multithreaded_mode && "Already multithreaded!"); multithreaded_mode = true; - global_lock = new sys::Mutex(true); // We fence here to ensure that all initialization is complete BEFORE we // return from llvm_start_multithreaded(). @@ -47,7 +44,6 @@ void llvm::llvm_stop_multithreaded() { sys::MemoryFence(); multithreaded_mode = false; - delete global_lock; #endif } @@ -55,14 +51,6 @@ bool llvm::llvm_is_multithreaded() { return multithreaded_mode; } -void llvm::llvm_acquire_global_lock() { - if (multithreaded_mode) global_lock->acquire(); -} - -void llvm::llvm_release_global_lock() { - if (multithreaded_mode) global_lock->release(); -} - #if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H) #include diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 61465ae5e8be..210bda754e74 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -84,14 +85,13 @@ static TimerGroup *getDefaultTimerGroup() { sys::MemoryFence(); if (tmp) return tmp; - llvm_acquire_global_lock(); + sys::SmartScopedLock Lock(*TimerLock); tmp = DefaultTimerGroup; if (!tmp) { tmp = new TimerGroup("Miscellaneous Ungrouped Timers"); sys::MemoryFence(); DefaultTimerGroup = tmp; } - llvm_release_global_lock(); return tmp; } From b7509c63e9e5739ce2fec27579648df3ff2ac249 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 19 Jun 2014 16:35:19 +0000 Subject: [PATCH 0036/1155] Tests for r211273 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211279 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/ARM/dwarf-asm-multiple-sections.s | 79 +++++++++++++++++++++ test/MC/ARM/dwarf-asm-no-code.s | 27 +++++++ test/MC/ARM/dwarf-asm-nonstandard-section.s | 57 +++++++++++++++ test/MC/ARM/dwarf-asm-single-section.s | 56 +++++++++++++++ 4 files changed, 219 insertions(+) create mode 100644 test/MC/ARM/dwarf-asm-multiple-sections.s create mode 100644 test/MC/ARM/dwarf-asm-no-code.s create mode 100644 test/MC/ARM/dwarf-asm-nonstandard-section.s create mode 100644 test/MC/ARM/dwarf-asm-single-section.s diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s new file mode 100644 index 000000000000..5e64fcc023ff --- /dev/null +++ b/test/MC/ARM/dwarf-asm-multiple-sections.s @@ -0,0 +1,79 @@ +// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp +// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s +// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s +// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s +// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 1 2>&1 | FileCheck -check-prefix DWARF1 %s +// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 5 2>&1 | FileCheck -check-prefix DWARF5 %s + .section .text, "ax" +a: + mov r0, r0 + + .section foo, "ax" +b: + mov r1, r1 + +// DWARF: .debug_abbrev contents: +// DWARF: Abbrev table for offset: 0x00000000 +// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes +// DWARF: DW_AT_stmt_list DW_FORM_data4 +// DWARF: DW_AT_ranges DW_FORM_data4 +// DWARF: DW_AT_name DW_FORM_string +// DWARF: DW_AT_comp_dir DW_FORM_string +// DWARF: DW_AT_producer DW_FORM_string +// DWARF: DW_AT_language DW_FORM_data2 + +// DWARF: .debug_info contents: +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1] +// CHECK-NOT-DWARF: DW_TAG_ +// DWARF: DW_AT_ranges [DW_FORM_data4] (0x00000000) + +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] * +// DWARF-NEXT: DW_AT_name [DW_FORM_string] ("a") + +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] * +// DWARF-NEXT: DW_AT_name [DW_FORM_string] ("b") + + +// DWARF: .debug_aranges contents: +// DWARF-NEXT: Address Range Header: length = 0x00000024, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00 +// DWARF-NEXT: [0x00000000 - 0x00000004) +// DWARF-NEXT: [0x00000000 - 0x00000004) + + +// DWARF: .debug_line contents: +// DWARF: 0x0000000000000000 9 0 1 0 0 is_stmt +// DWARF-NEXT: 0x0000000000000004 9 0 1 0 0 is_stmt end_sequence +// DWARF-NEXT: 0x0000000000000000 13 0 1 0 0 is_stmt +// DWARF-NEXT: 0x0000000000000004 13 0 1 0 0 is_stmt end_sequence + + +// DWARF: .debug_ranges contents: +// DWARF: 00000000 ffffffff 00000000 +// DWARF: 00000000 00000000 00000004 +// DWARF: 00000000 ffffffff 00000000 +// DWARF: 00000000 00000000 00000004 +// DWARF: 00000000 + + + +// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]: +// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev +// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line +// RELOC-NEXT: 00000010 R_ARM_ABS32 .debug_ranges +// RELOC-NEXT: 0000004f R_ARM_ABS32 .text +// RELOC-NEXT: 00000061 R_ARM_ABS32 foo + +// RELOC: RELOCATION RECORDS FOR [.rel.debug_ranges]: +// RELOC-NEXT: 00000004 R_ARM_ABS32 .text +// RELOC-NEXT: 00000014 R_ARM_ABS32 foo + +// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]: +// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info +// RELOC-NEXT: 00000010 R_ARM_ABS32 .text +// RELOC-NEXT: 00000018 R_ARM_ABS32 foo + + +// VERSION: {{.*}} error: DWARF2 only supports one section per compilation unit + +// DWARF1: Dwarf version 1 is not supported. +// DWARF5: Dwarf version 5 is not supported. diff --git a/test/MC/ARM/dwarf-asm-no-code.s b/test/MC/ARM/dwarf-asm-no-code.s new file mode 100644 index 000000000000..7d06a4190091 --- /dev/null +++ b/test/MC/ARM/dwarf-asm-no-code.s @@ -0,0 +1,27 @@ +// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp +// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s +// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s + +// If there is no code in an assembly file, no debug info is produced + +.section .data, "aw" +a: +.long 42 + +// DWARF: .debug_abbrev contents: +// DWARF-NEXT: < EMPTY > + +// DWARF: .debug_info contents: + +// DWARF: .debug_aranges contents: + +// DWARF: .debug_line contents: + +// DWARF: .debug_ranges contents: + + +// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_info]: + +// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]: + +// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_aranges]: diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s new file mode 100644 index 000000000000..adf92bab18d2 --- /dev/null +++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s @@ -0,0 +1,57 @@ +// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp +// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s +// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s + + .section foo, "ax" +b: + mov r1, r1 + +// DWARF: .debug_abbrev contents: +// DWARF: Abbrev table for offset: 0x00000000 +// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes +// DWARF: DW_AT_stmt_list DW_FORM_data4 +// DWARF: DW_AT_low_pc DW_FORM_addr +// DWARF: DW_AT_high_pc DW_FORM_addr +// DWARF: DW_AT_name DW_FORM_string +// DWARF: DW_AT_comp_dir DW_FORM_string +// DWARF: DW_AT_producer DW_FORM_string +// DWARF: DW_AT_language DW_FORM_data2 + +// DWARF: .debug_info contents: +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1] +// DWARF-NOT: DW_TAG_ +// DWARF: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000) +// DWARF: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000004) + +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] * +// DWARF-NEXT: DW_AT_name [DW_FORM_string] ("b") + + +// DWARF: .debug_aranges contents: +// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00 +// DWARF-NEXT: [0x00000000 - 0x00000004) + + +// DWARF: .debug_line contents: +// DWARF: 0x0000000000000000 7 0 1 0 0 is_stmt +// DWARF-NEXT: 0x0000000000000004 7 0 1 0 0 is_stmt end_sequence + + +// DWARF: .debug_ranges contents: +// DWARF-NOT: {{0-9a-f}} +// DWARF: .debug_pubnames contents: + + + +// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]: +// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev +// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line +// RELOC-NEXT: 00000010 R_ARM_ABS32 foo +// RELOC-NEXT: 00000014 R_ARM_ABS32 foo +// RELOC-NEXT: 00000053 R_ARM_ABS32 foo + +// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]: + +// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]: +// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info +// RELOC-NEXT: 00000010 R_ARM_ABS32 foo diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s new file mode 100644 index 000000000000..d69dfab5dee5 --- /dev/null +++ b/test/MC/ARM/dwarf-asm-single-section.s @@ -0,0 +1,56 @@ +// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp +// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s +// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s + + .section .text, "ax" +a: + mov r0, r0 + + +// DWARF: .debug_abbrev contents: +// DWARF: Abbrev table for offset: 0x00000000 +// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes +// DWARF: DW_AT_stmt_list DW_FORM_data4 +// DWARF: DW_AT_low_pc DW_FORM_addr +// DWARF: DW_AT_high_pc DW_FORM_addr +// DWARF: DW_AT_name DW_FORM_string +// DWARF: DW_AT_comp_dir DW_FORM_string +// DWARF: DW_AT_producer DW_FORM_string +// DWARF: DW_AT_language DW_FORM_data2 + +// DWARF: .debug_info contents: +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1] +// CHECK-NOT-DWARF: DW_TAG_ +// DWARF: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000) +// DWARF: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000004) + +// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] * +// DWARF-NEXT: DW_AT_name [DW_FORM_string] ("a") + + +// DWARF: .debug_aranges contents: +// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00 +// DWARF-NEXT: [0x00000000 - 0x00000004) + +// DWARF: .debug_line contents: +// DWARF: 0x0000000000000000 7 0 1 0 0 is_stmt +// DWARF-NEXT: 0x0000000000000004 7 0 1 0 0 is_stmt end_sequence + + +// DWARF: .debug_ranges contents: +// DWARF-NOT: {{0-9a-f}} +// DWARF: .debug_pubnames contents: + + +// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]: +// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev +// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line +// RELOC-NEXT: 00000010 R_ARM_ABS32 .text +// RELOC-NEXT: 00000014 R_ARM_ABS32 .text +// RELOC-NEXT: 00000053 R_ARM_ABS32 .text + +// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]: + +// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]: +// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info +// RELOC-NEXT: 00000010 R_ARM_ABS32 .text From e4d0a5ec1841ac5a407c3a07b62749923dda74c2 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Thu, 19 Jun 2014 16:50:16 +0000 Subject: [PATCH 0037/1155] [ValueTracking] Extend range metadata to call/invoke Summary: With this patch, range metadata can be added to call/invoke including IntrinsicInst. Previously, it could only be added to load. Rename computeKnownBitsLoad to computeKnownBitsFromRangeMetadata because range metadata is not only used by load. Update the language reference to reflect this change. Test Plan: Add several tests in range-2.ll to confirm the verifier is happy with having range metadata on call/invoke. Add two tests in AddOverFlow.ll to confirm annotating range metadata to call/invoke can benefit InstCombine. Reviewers: meheff, nlewycky, reames, hfinkel, eliben Reviewed By: eliben Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D4187 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211281 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 16 +++++---- include/llvm/Analysis/ValueTracking.h | 5 ++- lib/Analysis/ValueTracking.cpp | 17 ++++++--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- lib/IR/Verifier.cpp | 3 +- test/Transforms/InstCombine/AddOverFlow.ll | 38 ++++++++++++++++++++ test/Transforms/InstCombine/add2.ll | 41 ++++++++++++++++++++++ test/Verifier/range-1.ll | 2 +- test/Verifier/range-2.ll | 30 ++++++++++++++++ 9 files changed, 138 insertions(+), 16 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index cb94d3967b13..b75058fa9a5f 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -2749,11 +2749,12 @@ number representing the maximum relative error, for example: '``range``' Metadata ^^^^^^^^^^^^^^^^^^^^ -``range`` metadata may be attached only to loads of integer types. It -expresses the possible ranges the loaded value is in. The ranges are -represented with a flattened list of integers. The loaded value is known -to be in the union of the ranges defined by each consecutive pair. Each -pair has the following properties: +``range`` metadata may be attached only to ``load``, ``call`` and ``invoke`` of +integer types. It expresses the possible ranges the loaded value or the value +returned by the called function at this call site is in. The ranges are +represented with a flattened list of integers. The loaded value or the value +returned is known to be in the union of the ranges defined by each consecutive +pair. Each pair has the following properties: - The type must match the type loaded by the instruction. - The pair ``a,b`` represents the range ``[a,b)``. @@ -2771,8 +2772,9 @@ Examples: %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1 %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1 - %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5 - %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5 + %c = call i8 @foo(), !range !2 ; Can only be 0, 1, 3, 4 or 5 + %d = invoke i8 @bar() to label %cont + unwind label %lpad, !range !3 ; Can only be -2, -1, 3, 4 or 5 ... !0 = metadata !{ i8 0, i8 2 } !1 = metadata !{ i8 255, i8 2 } diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index ce7896738d8c..83b5408fb1c2 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -37,7 +37,10 @@ namespace llvm { /// for all of the elements in the vector. void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout *TD = nullptr, unsigned Depth = 0); - void computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero); + /// Compute known bits from the range metadata. + /// \p KnownZero the set of bits that are known to be zero + void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, + APInt &KnownZero); /// ComputeSignBit - Determine whether the sign bit is known to be zero or /// one. Convenience wrapper around computeKnownBits. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 4f4875357828..9de945ed7016 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -188,7 +188,8 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, KnownOne.setBit(BitWidth - 1); } -void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) { +void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, + APInt &KnownZero) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); @@ -338,7 +339,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Instruction::Load: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsLoad(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -733,6 +734,12 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Call: + case Instruction::Invoke: + if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) + computeKnownBitsFromRangeMetadata(*MD, KnownZero); + // If a range metadata is attached to this IntrinsicInst, intersect the + // explicit range specified by the metadata and the implicit range of + // the intrinsic. if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; @@ -742,16 +749,16 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::ctpop: { unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero = APInt::getHighBitsSet(64, 32); + KnownZero |= APInt::getHighBitsSet(64, 32); break; } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 639eb462e7ff..5923f0e208e1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2084,7 +2084,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeKnownBitsLoad(*Ranges, KnownZero); + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); } break; } diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index d1c7f7d25c39..ad3c29c564e7 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -2233,7 +2233,8 @@ void Verifier::visitInstruction(Instruction &I) { } MDNode *MD = I.getMetadata(LLVMContext::MD_range); - Assert1(!MD || isa(I), "Ranges are only for loads!", &I); + Assert1(!MD || isa(I) || isa(I) || isa(I), + "Ranges are only for loads, calls and invokes!", &I); InstsInThisBlock.insert(&I); } diff --git a/test/Transforms/InstCombine/AddOverFlow.ll b/test/Transforms/InstCombine/AddOverFlow.ll index 70178ec83b6e..8f3d429c8d60 100644 --- a/test/Transforms/InstCombine/AddOverFlow.ll +++ b/test/Transforms/InstCombine/AddOverFlow.ll @@ -34,6 +34,44 @@ define i16 @zero_sign_bit2(i16 %a, i16 %b) { ret i16 %3 } +declare i16 @bounded(i16 %input); +declare i32 @__gxx_personality_v0(...); +!0 = metadata !{i16 0, i16 32768} ; [0, 32767] +!1 = metadata !{i16 0, i16 32769} ; [0, 32768] + +define i16 @add_bounded_values(i16 %a, i16 %b) { +; CHECK-LABEL: @add_bounded_values( +entry: + %c = call i16 @bounded(i16 %a), !range !0 + %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !0 +cont: +; %c and %d are in [0, 32767]. Therefore, %c + %d doesn't unsigned overflow. + %e = add i16 %c, %d +; CHECK: add nuw i16 %c, %d + ret i16 %e +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + ret i16 42 +} + +define i16 @add_bounded_values_2(i16 %a, i16 %b) { +; CHECK-LABEL: @add_bounded_values_2( +entry: + %c = call i16 @bounded(i16 %a), !range !1 + %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !1 +cont: +; Similar to add_bounded_values, but %c and %d are in [0, 32768]. Therefore, +; %c + %d may unsigned overflow and we cannot add NUW. + %e = add i16 %c, %d +; CHECK: add i16 %c, %d + ret i16 %e +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + ret i16 42 +} + ; CHECK-LABEL: @ripple_nsw1 ; CHECK: add nsw i16 %a, %b define i16 @ripple_nsw1(i16 %x, i16 %y) { diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll index 0328c4ff6e3f..821fce0dabcc 100644 --- a/test/Transforms/InstCombine/add2.ll +++ b/test/Transforms/InstCombine/add2.ll @@ -217,3 +217,44 @@ define i32 @mul_add_to_mul_6(i32 %x, i32 %y) { ; CHECK-NEXT: %add = mul nsw i32 %mul1, 6 ; CHECK-NEXT: ret i32 %add } + +; This test and the next test verify that when a range metadata is attached to +; llvm.cttz, ValueTracking correctly intersects the range specified by the +; metadata and the range implied by the intrinsic. +; +; In this test, the range specified by the metadata is more strict. Therefore, +; ValueTracking uses that range. +define i16 @add_cttz(i16 %a) { +; CHECK-LABEL: @add_cttz( + ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned + ; is in [0, 16). The range metadata indicates the value returned is in [0, 8). + ; Intersecting these ranges, we know the value returned is in [0, 8). + ; Therefore, InstCombine will transform + ; add %cttz, 1111 1111 1111 1000 ; decimal -8 + ; to + ; or %cttz, 1111 1111 1111 1000 + %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !0 + %b = add i16 %cttz, -8 +; CHECK: or i16 %cttz, -8 + ret i16 %b +} +declare i16 @llvm.cttz.i16(i16, i1) +!0 = metadata !{i16 0, i16 8} + +; Similar to @add_cttz, but in this test, the range implied by the +; intrinsic is more strict. Therefore, ValueTracking uses that range. +define i16 @add_cttz_2(i16 %a) { +; CHECK-LABEL: @add_cttz_2( + ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned + ; is in [0, 16). The range metadata indicates the value returned is in + ; [0, 32). Intersecting these ranges, we know the value returned is in + ; [0, 16). Therefore, InstCombine will transform + ; add %cttz, 1111 1111 1111 0000 ; decimal -16 + ; to + ; or %cttz, 1111 1111 1111 0000 + %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !1 + %b = add i16 %cttz, -16 +; CHECK: or i16 %cttz, -16 + ret i16 %b +} +!1 = metadata !{i16 0, i16 32} diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll index b6a75d13bba0..f15ca3f74065 100644 --- a/test/Verifier/range-1.ll +++ b/test/Verifier/range-1.ll @@ -6,7 +6,7 @@ entry: ret void } !0 = metadata !{i8 0, i8 1} -; CHECK: Ranges are only for loads! +; CHECK: Ranges are only for loads, calls and invokes! ; CHECK-NEXT: store i8 0, i8* %x, align 1, !range !0 define i8 @f2(i8* %x) { diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll index 8d85d1915195..1d2e0575d76a 100644 --- a/test/Verifier/range-2.ll +++ b/test/Verifier/range-2.ll @@ -34,3 +34,33 @@ entry: ret i8 %y } !4 = metadata !{i8 -1, i8 0, i8 1, i8 -2} + +; We can annotate the range of the return value of a CALL. +define void @call_all(i8* %x) { +entry: + %v1 = call i8 @f1(i8* %x), !range !0 + %v2 = call i8 @f2(i8* %x), !range !1 + %v3 = call i8 @f3(i8* %x), !range !2 + %v4 = call i8 @f4(i8* %x), !range !3 + %v5 = call i8 @f5(i8* %x), !range !4 + ret void +} + +; We can annotate the range of the return value of an INVOKE. +define void @invoke_all(i8* %x) { +entry: + %v1 = invoke i8 @f1(i8* %x) to label %cont unwind label %lpad, !range !0 + %v2 = invoke i8 @f2(i8* %x) to label %cont unwind label %lpad, !range !1 + %v3 = invoke i8 @f3(i8* %x) to label %cont unwind label %lpad, !range !2 + %v4 = invoke i8 @f4(i8* %x) to label %cont unwind label %lpad, !range !3 + %v5 = invoke i8 @f5(i8* %x) to label %cont unwind label %lpad, !range !4 + +cont: + ret void + +lpad: + %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + ret void +} +declare i32 @__gxx_personality_v0(...) From 36a321160e61149088f306c3605a9ad5fdd67e0a Mon Sep 17 00:00:00 2001 From: Alp Toker Date: Thu, 19 Jun 2014 17:15:36 +0000 Subject: [PATCH 0038/1155] MCNullStreamer: assign file IDs to resolve crashes and errors Use the MCStreamer base implementations for file ID tracking instead of overriding them as no-ops. Avoids assertions when streaming Dwarf debug info, and fixes ASM parsing of loc and file directives. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211282 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCNullStreamer.cpp | 10 +--------- test/DebugInfo/global.ll | 3 +++ test/MC/AsmParser/directive_file.s | 1 + test/MC/AsmParser/directive_line.s | 1 + test/MC/AsmParser/directive_loc.s | 1 + 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 4f2740ed3ae9..5ab2829a6ba3 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -81,15 +81,7 @@ namespace { unsigned char Value = 0) override { return false; } void EmitFileDirective(StringRef Filename) override {} - unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename, - unsigned CUID = 0) override { - return 0; - } - void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, - unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator, - StringRef FileName) override {} + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo&) override {} void EmitBundleAlignMode(unsigned AlignPow2) override {} diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll index c515114fd50b..3c97f0cb2279 100644 --- a/test/DebugInfo/global.ll +++ b/test/DebugInfo/global.ll @@ -3,6 +3,9 @@ ; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t ; RUN: llvm-dwarfdump %t | FileCheck %s +; Also test that the null streamer doesn't crash with debug info. +; RUN: %llc_dwarf -O0 -filetype=null < %s + ; generated from the following source compiled to bitcode with clang -g -O1 ; static int i; ; int main() { diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s index 9b99e0f24e99..d7290ebe1dbe 100644 --- a/test/MC/AsmParser/directive_file.s +++ b/test/MC/AsmParser/directive_file.s @@ -1,4 +1,5 @@ # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s +# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null .file "hello" .file 1 "worl\144" # "\144" is "d" diff --git a/test/MC/AsmParser/directive_line.s b/test/MC/AsmParser/directive_line.s index 94ce44602998..110b68a46216 100644 --- a/test/MC/AsmParser/directive_line.s +++ b/test/MC/AsmParser/directive_line.s @@ -1,4 +1,5 @@ # RUN: llvm-mc -triple i386-unknown-unknown %s +# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null # FIXME: Actually test the output. .line diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s index cda9579fb272..404ebcecdd0a 100644 --- a/test/MC/AsmParser/directive_loc.s +++ b/test/MC/AsmParser/directive_loc.s @@ -1,4 +1,5 @@ # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s +# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null .file 1 "hello" # CHECK: .file 1 "hello" From 6072096fd63c1996cfe731ecddcb6b3232e37b70 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 19 Jun 2014 17:59:14 +0000 Subject: [PATCH 0039/1155] DebugInfo: Fission: Ensure the address pool entries for location lists are emitted. The address pool was being emitted before location lists. The latter could add more entries to the pool which would be lost/never emitted. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211284 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- test/DebugInfo/X86/fission-ranges.ll | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 3847eb124f3d..f78ca2c03b4e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1038,9 +1038,9 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); + emitDebugLocDWO(); // Emit DWO addresses. AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); - emitDebugLocDWO(); } else // Emit info into a debug loc section. emitDebugLoc(); diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll index 057039c3c5cc..135837582fcc 100644 --- a/test/DebugInfo/X86/fission-ranges.ll +++ b/test/DebugInfo/X86/fission-ranges.ll @@ -44,6 +44,13 @@ ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo) ; HDR-NOT: .rela.{{.*}}.dwo +; Make sure we have enough stuff in the debug_addr to cover the address indexes +; (6 is the last index in debug_loc.dwo, making 7 entries of 8 bytes each, 7 * 8 +; == 56 base 10 == 38 base 16) + +; HDR: .debug_addr 00000038 +; HDR-NOT: .rela.{{.*}}.dwo + ; From the code: ; extern int c; From a4d0ff9cd1fb3fe4c3a58d0747dc523f7d9e9e3f Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 19 Jun 2014 18:18:23 +0000 Subject: [PATCH 0040/1155] Remove support for LLVM runtime multi-threading. After a number of previous small iterations, the functions llvm_start_multithreaded() and llvm_stop_multithreaded() have been reduced essentially to no-ops. This change removes them entirely. Reviewed by: rnk, dblaikie Differential Revision: http://reviews.llvm.org/D4216 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211287 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/ProgrammersManual.rst | 45 +++------------------------- include/llvm-c/Core.h | 13 ++++---- include/llvm/Support/ManagedStatic.h | 3 -- include/llvm/Support/Threading.h | 23 ++++---------- lib/IR/Core.cpp | 3 +- lib/Support/Threading.cpp | 29 ++---------------- unittests/Support/ManagedStatic.cpp | 2 -- 7 files changed, 19 insertions(+), 99 deletions(-) diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index df3795a34d33..a7b28b36ca1d 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -2170,46 +2170,13 @@ compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and using the resultant compiler to build a copy of LLVM with multithreading support. -.. _startmultithreaded: - -Entering and Exiting Multithreaded Mode ---------------------------------------- - -In order to properly protect its internal data structures while avoiding -excessive locking overhead in the single-threaded case, the LLVM must intialize -certain data structures necessary to provide guards around its internals. To do -so, the client program must invoke ``llvm_start_multithreaded()`` before making -any concurrent LLVM API calls. To subsequently tear down these structures, use -the ``llvm_stop_multithreaded()`` call. You can also use the -``llvm_is_multithreaded()`` call to check the status of multithreaded mode. - -Note that both of these calls must be made *in isolation*. That is to say that -no other LLVM API calls may be executing at any time during the execution of -``llvm_start_multithreaded()`` or ``llvm_stop_multithreaded``. It is the -client's responsibility to enforce this isolation. - -The return value of ``llvm_start_multithreaded()`` indicates the success or -failure of the initialization. Failure typically indicates that your copy of -LLVM was built without multithreading support, typically because GCC atomic -intrinsics were not found in your system compiler. In this case, the LLVM API -will not be safe for concurrent calls. However, it *will* be safe for hosting -threaded applications in the JIT, though :ref:`care must be taken -` to ensure that side exits and the like do not accidentally -result in concurrent LLVM API calls. - .. _shutdown: Ending Execution with ``llvm_shutdown()`` ----------------------------------------- When you are done using the LLVM APIs, you should call ``llvm_shutdown()`` to -deallocate memory used for internal structures. This will also invoke -``llvm_stop_multithreaded()`` if LLVM is operating in multithreaded mode. As -such, ``llvm_shutdown()`` requires the same isolation guarantees as -``llvm_stop_multithreaded()``. - -Note that, if you use scope-based shutdown, you can use the -``llvm_shutdown_obj`` class, which calls ``llvm_shutdown()`` in its destructor. +deallocate memory used for internal structures. .. _managedstatic: @@ -2217,15 +2184,11 @@ Lazy Initialization with ``ManagedStatic`` ------------------------------------------ ``ManagedStatic`` is a utility class in LLVM used to implement static -initialization of static resources, such as the global type tables. Before the -invocation of ``llvm_shutdown()``, it implements a simple lazy initialization -scheme. Once ``llvm_start_multithreaded()`` returns, however, it uses +initialization of static resources, such as the global type tables. In a +single-threaded environment, it implements a simple lazy initialization scheme. +When LLVM is compiled with support for multi-threading, however, it uses double-checked locking to implement thread-safe lazy initialization. -Note that, because no other threads are allowed to issue LLVM API calls before -``llvm_start_multithreaded()`` returns, it is possible to have -``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s. - .. _llvmcontext: Achieving Isolation with ``LLVMContext`` diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 0e78ed71fa9a..8693a3020aba 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -2848,16 +2848,13 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM); * @{ */ -/** Allocate and initialize structures needed to make LLVM safe for - multithreading. The return value indicates whether multithreaded - initialization succeeded. Must be executed in isolation from all - other LLVM api calls. - @see llvm::llvm_start_multithreaded */ +/** Deprecated: Multi-threading can only be enabled/disabled with the compile + time define LLVM_ENABLE_THREADS. This function always returns + LLVMIsMultithreaded(). */ LLVMBool LLVMStartMultithreaded(void); -/** Deallocate structures necessary to make LLVM safe for multithreading. - Must be executed in isolation from all other LLVM api calls. - @see llvm::llvm_stop_multithreaded */ +/** Deprecated: Multi-threading can only be enabled/disabled with the compile + time define LLVM_ENABLE_THREADS. */ void LLVMStopMultithreaded(void); /** Check whether LLVM is executing in thread-safe mode or not. diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h index 1bb8cea092f9..d8fbfeb8e20c 100644 --- a/include/llvm/Support/ManagedStatic.h +++ b/include/llvm/Support/ManagedStatic.h @@ -103,9 +103,6 @@ void llvm_shutdown(); /// llvm_shutdown() when it is destroyed. struct llvm_shutdown_obj { llvm_shutdown_obj() { } - explicit llvm_shutdown_obj(bool multithreaded) { - if (multithreaded) llvm_start_multithreaded(); - } ~llvm_shutdown_obj() { llvm_shutdown(); } }; diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h index d84d11867958..981971bd1e86 100644 --- a/include/llvm/Support/Threading.h +++ b/include/llvm/Support/Threading.h @@ -7,30 +7,19 @@ // //===----------------------------------------------------------------------===// // -// TThis file defines llvm_start_multithreaded() and friends. +// This file declares helper functions for running LLVM in a multi-threaded +// environment. // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_THREADING_H #define LLVM_SUPPORT_THREADING_H -namespace llvm { - /// llvm_start_multithreaded - Allocate and initialize structures needed to - /// make LLVM safe for multithreading. The return value indicates whether - /// multithreaded initialization succeeded. LLVM will still be operational - /// on "failed" return, and will still be safe for hosting threading - /// applications in the JIT, but will not be safe for concurrent calls to the - /// LLVM APIs. - /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS. - bool llvm_start_multithreaded(); - - /// llvm_stop_multithreaded - Deallocate structures necessary to make LLVM - /// safe for multithreading. - /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS. - void llvm_stop_multithreaded(); +#include "llvm/Support/Mutex.h" - /// llvm_is_multithreaded - Check whether LLVM is executing in thread-safe - /// mode or not. +namespace llvm { + /// Returns true if LLVM is compiled with support for multi-threading, and + /// false otherwise. bool llvm_is_multithreaded(); /// llvm_execute_on_thread - Execute the given \p UserFn on a separate diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 197b6cb9054e..68b9baa2baa2 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -2702,11 +2702,10 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM) { /*===-- Threading ------------------------------------------------------===*/ LLVMBool LLVMStartMultithreaded() { - return llvm_start_multithreaded(); + return LLVMIsMultithreaded(); } void LLVMStopMultithreaded() { - llvm_stop_multithreaded(); } LLVMBool LLVMIsMultithreaded() { diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp index 2358dde0e6c5..ca7f3f64aa37 100644 --- a/lib/Support/Threading.cpp +++ b/lib/Support/Threading.cpp @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements llvm_start_multithreaded() and friends. +// This file defines helper functions for running LLVM in a multi-threaded +// environment. // //===----------------------------------------------------------------------===// @@ -19,38 +20,14 @@ using namespace llvm; -static bool multithreaded_mode = false; - -bool llvm::llvm_start_multithreaded() { +bool llvm::llvm_is_multithreaded() { #if LLVM_ENABLE_THREADS != 0 - assert(!multithreaded_mode && "Already multithreaded!"); - multithreaded_mode = true; - - // We fence here to ensure that all initialization is complete BEFORE we - // return from llvm_start_multithreaded(). - sys::MemoryFence(); return true; #else return false; #endif } -void llvm::llvm_stop_multithreaded() { -#if LLVM_ENABLE_THREADS != 0 - assert(multithreaded_mode && "Not currently multithreaded!"); - - // We fence here to insure that all threaded operations are complete BEFORE we - // return from llvm_stop_multithreaded(). - sys::MemoryFence(); - - multithreaded_mode = false; -#endif -} - -bool llvm::llvm_is_multithreaded() { - return multithreaded_mode; -} - #if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H) #include diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp index ad2fc977e6d7..153884ba4298 100644 --- a/unittests/Support/ManagedStatic.cpp +++ b/unittests/Support/ManagedStatic.cpp @@ -47,7 +47,6 @@ TEST(Initialize, MultipleThreads) { void *p1 = test1::allocate_stack(a1); void *p2 = test1::allocate_stack(a2); - llvm_start_multithreaded(); pthread_t t1, t2; pthread_create(&t1, &a1, test1::helper, nullptr); pthread_create(&t2, &a2, test1::helper, nullptr); @@ -55,7 +54,6 @@ TEST(Initialize, MultipleThreads) { pthread_join(t2, nullptr); free(p1); free(p2); - llvm_stop_multithreaded(); } #endif From ab19aae90fc10bab660ceb3d426f31a3cb66d99e Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 19 Jun 2014 18:25:06 +0000 Subject: [PATCH 0041/1155] Fix build on non-Windows platforms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211288 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Mutex.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h index 496a4381f3fc..f340051361b0 100644 --- a/include/llvm/Support/Mutex.h +++ b/include/llvm/Support/Mutex.h @@ -15,11 +15,13 @@ #define LLVM_SUPPORT_MUTEX_H #include "llvm/Support/Compiler.h" -#include "llvm/Support/Threading.h" #include namespace llvm { + // Forward declare this function. + bool llvm_is_multithreaded(); + namespace sys { /// @brief Platform agnostic Mutex class. From 9022b9a501c2c213deb2d062282658f367758dc7 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 19 Jun 2014 18:26:26 +0000 Subject: [PATCH 0042/1155] Remove circular header reference in Threading.h/Mutex.h git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211290 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Threading.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h index 981971bd1e86..7e8758407c7c 100644 --- a/include/llvm/Support/Threading.h +++ b/include/llvm/Support/Threading.h @@ -15,8 +15,6 @@ #ifndef LLVM_SUPPORT_THREADING_H #define LLVM_SUPPORT_THREADING_H -#include "llvm/Support/Mutex.h" - namespace llvm { /// Returns true if LLVM is compiled with support for multi-threading, and /// false otherwise. From f25c2015b33b5414598b4528763bdc5079385156 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 19 Jun 2014 18:26:28 +0000 Subject: [PATCH 0043/1155] Remove use of removed function, llvm_stop_multithreading git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211291 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/ManagedStatic.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index 9d7e99f97fb5..da01195a9df6 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -94,6 +94,4 @@ void llvm::llvm_shutdown() { while (StaticList) StaticList->destroy(); - - if (llvm_is_multithreaded()) llvm_stop_multithreaded(); } From cadf0f68e4266d4ecef23360cd62be12057991c9 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 18:36:15 +0000 Subject: [PATCH 0044/1155] Relax this test a bit, we don't need the full contents of the frame section to match, just the version for this test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211293 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/ELF/cfi-version.ll | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll index a06638c04e0e..dfe2c36d550f 100644 --- a/test/MC/ELF/cfi-version.ll +++ b/test/MC/ELF/cfi-version.ll @@ -2,9 +2,6 @@ ; RUN: llc %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 ; RUN: llc %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" -target triple = "armv8-arm-none-eabi" - ; Function Attrs: nounwind define i32 @foo() #0 { entry: @@ -37,11 +34,9 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n !12 = metadata !{i32 2, i32 0, metadata !4, null} ; DWARF2: .debug_frame contents: -; DWARF2: 00000000 0000000c ffffffff CIE -; DWARF2-NEXT: Version: 1 +; DWARF2: Version: 1 ; DWARF2-NEXT: Augmentation: ; DWARF34: .debug_frame contents: -; DWARF34: 00000000 0000000c ffffffff CIE -; DWARF34-NEXT: Version: 3 +; DWARF34: Version: 3 ; DWARF34-NEXT: Augmentation: From 01ccb9c9b8544aaf8076672abde4984e6ea56036 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 19:11:22 +0000 Subject: [PATCH 0045/1155] Use the c++ APIs. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211294 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/lto/lto.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index 64abf5cd0582..86e1e810e8d6 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/CommandFlags.h" #include "llvm/LTO/LTOCodeGenerator.h" #include "llvm/LTO/LTOModule.h" +#include "llvm/Support/TargetSelect.h" // extra command-line flags needed for LTOCodeGenerator static cl::opt @@ -46,12 +47,12 @@ static bool parsedOptions = false; // Initialize the configured targets if they have not been initialized. static void lto_initialize() { if (!initialized) { - LLVMInitializeAllTargetInfos(); - LLVMInitializeAllTargets(); - LLVMInitializeAllTargetMCs(); - LLVMInitializeAllAsmParsers(); - LLVMInitializeAllAsmPrinters(); - LLVMInitializeAllDisassemblers(); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + InitializeAllDisassemblers(); initialized = true; } } From 580a14ff48136183f7904f3a951c37c81409a73e Mon Sep 17 00:00:00 2001 From: Alp Toker Date: Thu, 19 Jun 2014 19:25:49 +0000 Subject: [PATCH 0046/1155] Remove unused includes following r211294 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211297 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/lto/lto.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index 86e1e810e8d6..d53d72a52293 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -13,8 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm-c/lto.h" -#include "llvm-c/Core.h" -#include "llvm-c/Target.h" #include "llvm/CodeGen/CommandFlags.h" #include "llvm/LTO/LTOCodeGenerator.h" #include "llvm/LTO/LTOModule.h" From 74fc19ebf43051fa3da353c8f043d0b651f964c0 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 19:26:42 +0000 Subject: [PATCH 0047/1155] Fix this test a little harder - use llc_dwarf to make sure we don't try to execute it on windows. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211298 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/ELF/cfi-version.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll index dfe2c36d550f..0588fcd45da6 100644 --- a/test/MC/ELF/cfi-version.ll +++ b/test/MC/ELF/cfi-version.ll @@ -1,6 +1,6 @@ -; RUN: llc %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2 -; RUN: llc %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 -; RUN: llc %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 +; RUN: %llc_dwarf %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2 +; RUN: %llc_dwarf %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 +; RUN: %llc_dwarf %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34 ; Function Attrs: nounwind define i32 @foo() #0 { From 5b45c5d7ec51ef019a8133c7e27251ce3c9f0bbd Mon Sep 17 00:00:00 2001 From: David Greene Date: Thu, 19 Jun 2014 19:31:05 +0000 Subject: [PATCH 0048/1155] Turn of -Werror by default Don't build with -Werror unless asked to. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211299 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/llvm-compilers-check | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check index 3173027759b7..d745b3a7c506 100755 --- a/utils/llvm-compilers-check +++ b/utils/llvm-compilers-check @@ -149,6 +149,8 @@ def add_options(parser): help=("Do not do installs")) parser.add_option("--keep-going", default=False, action="store_true", help=("Keep going after failures")) + parser.add_option("--enable-werror", default=False, action="store_true", + help=("Build with -Werror")) return def check_options(parser, options, valid_builds): @@ -361,16 +363,13 @@ class Builder(threading.Thread): configure_flags = dict( llvm=dict(debug=["--prefix=" + self.install_prefix, - "--enable-werror", "--enable-assertions", "--disable-optimized", "--with-gcc-toolchain=" + cxxroot], release=["--prefix=" + self.install_prefix, - "--enable-werror", "--enable-optimized", "--with-gcc-toolchain=" + cxxroot], paranoid=["--prefix=" + self.install_prefix, - "--enable-werror", "--enable-assertions", "--enable-expensive-checks", "--disable-optimized", @@ -379,6 +378,11 @@ class Builder(threading.Thread): release=[], paranoid=[])) + if (self.options.enable_werror): + configure_flags["llvm"]["debug"].append("--enable-werror") + configure_flags["llvm"]["release"].append("--enable-werror") + configure_flags["llvm"]["paranoid"].append("--enable-werror") + configure_env = dict( llvm=dict(debug=dict(CC=self.cc, CXX=self.cxx), From 30db0c28dee5c8dc8b4bfeb2a94a22ceaba7aec9 Mon Sep 17 00:00:00 2001 From: David Greene Date: Thu, 19 Jun 2014 19:31:09 +0000 Subject: [PATCH 0049/1155] Add option to keep flavor out of the install directory Sometimes we want to install things in "standard" locations and the flavor directories interfere with that. Add an option to keep them out of the install path. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211300 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/llvm-compilers-check | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check index d745b3a7c506..c8bd905ff9ce 100755 --- a/utils/llvm-compilers-check +++ b/utils/llvm-compilers-check @@ -149,6 +149,8 @@ def add_options(parser): help=("Do not do installs")) parser.add_option("--keep-going", default=False, action="store_true", help=("Keep going after failures")) + parser.add_option("--no-flavor-prefix", default=False, action="store_true", + help=("Do not append the build flavor to the install path")) parser.add_option("--enable-werror", default=False, action="store_true", help=("Build with -Werror")) return @@ -348,7 +350,9 @@ class Builder(threading.Thread): ssabbrev = get_short_abbrevs([ab for ab in self.source_abbrev.values()]) prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + self.build_abbrev[build] + "]" - self.install_prefix += "/" + self.source_abbrev[source] + "/" + build + if (not self.options.no_flavor_prefix): + self.install_prefix += "/" + self.source_abbrev[source] + "/" + build + build_suffix += "/" + self.source_abbrev[source] + "/" + build self.logger = logging.getLogger(prefix) From 581d5d5df8a7811e1dd5c0221dd2e212ef8dd2ac Mon Sep 17 00:00:00 2001 From: David Greene Date: Thu, 19 Jun 2014 19:31:11 +0000 Subject: [PATCH 0050/1155] Remove bogus configure check Configure creates makefiles, so it doesn't make sense to check for them to see if we can configure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211301 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/llvm-compilers-check | 4 ---- 1 file changed, 4 deletions(-) diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check index c8bd905ff9ce..4db8426ace88 100755 --- a/utils/llvm-compilers-check +++ b/utils/llvm-compilers-check @@ -538,10 +538,6 @@ class Builder(threading.Thread): self.logger.info("[" + prefix + "] Configure failed, no configure script " + conf) return -1 - if not os.path.exists(mf): - self.logger.info("[" + prefix + "] Configure failed, no makefile " + mf) - return -1 - if os.path.exists(conf) and os.path.exists(mf): confstat = os.stat(conf) makestat = os.stat(mf) From f21de942f3785d86d1f82cd19072ca26de9167da Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Thu, 19 Jun 2014 19:35:39 +0000 Subject: [PATCH 0051/1155] Support: Add llvm::sys::fs::copy_file A function to copy one file's contents to another. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211302 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/FileSystem.h | 6 ++++++ lib/Support/Path.cpp | 34 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 6abe90446b47..e56e2b718a75 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -335,6 +335,12 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting = true); /// @param to The path to rename to. This is created. std::error_code rename(const Twine &from, const Twine &to); +/// @brief Copy the contents of \a From to \a To. +/// +/// @param From The path to copy from. +/// @param To The path to copy to. This is created. +std::error_code copy_file(const Twine &From, const Twine &To); + /// @brief Resize path to size. File is resized as if by POSIX truncate(). /// /// @param path Input path. diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index 15edf0ddbbd8..535394c5b3ba 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -846,6 +846,40 @@ std::error_code create_directories(const Twine &Path, bool IgnoreExisting) { return create_directory(P, IgnoreExisting); } +std::error_code copy_file(const Twine &From, const Twine &To) { + int ReadFD, WriteFD; + if (std::error_code EC = openFileForRead(From, ReadFD)) + return EC; + if (std::error_code EC = openFileForWrite(To, WriteFD, F_None)) { + close(ReadFD); + return EC; + } + + const size_t BufSize = 4096; + char *Buf = new char[BufSize]; + int BytesRead = 0, BytesWritten = 0; + for (;;) { + BytesRead = read(ReadFD, Buf, BufSize); + if (BytesRead <= 0) + break; + while (BytesRead) { + BytesWritten = write(WriteFD, Buf, BytesRead); + if (BytesWritten < 0) + break; + BytesRead -= BytesWritten; + } + if (BytesWritten < 0) + break; + } + close(ReadFD); + close(WriteFD); + delete[] Buf; + + if (BytesRead < 0 || BytesWritten < 0) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + bool exists(file_status status) { return status_known(status) && status.type() != file_type::file_not_found; } From d06976aba71501f4970bc5bf28d3a9a98c1fb3ad Mon Sep 17 00:00:00 2001 From: Alp Toker Date: Thu, 19 Jun 2014 19:41:26 +0000 Subject: [PATCH 0052/1155] Fix typos git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 2 +- lib/Target/R600/AMDGPUIntrinsics.td | 2 +- lib/Target/X86/X86FastISel.cpp | 2 +- test/CodeGen/PowerPC/vec_cmp.ll | 8 ++++---- test/DebugInfo/2009-11-05-DeadGlobalVariable.ll | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4a762ce6d3bd..cf849951088a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -7147,7 +7147,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, // thumb-2 environment, so there is no interworking required. As a result, we // do not expect a veneer to be emitted by the linker, clobbering IP. // - // Each module recieves its own copy of __chkstk, so no import thunk is + // Each module receives its own copy of __chkstk, so no import thunk is // required, again, ensuring that IP is not clobbered. // // Finally, although some linkers may theoretically provide a trampoline for diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index 538b4cd8af28..27c0dbea09f4 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -67,7 +67,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_barrier_global : Intrinsic<[], [], []>; } -// Legacy names for compatability. +// Legacy names for compatibility. let TargetPrefix = "AMDIL", isTarget = 1 in { def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9557d96d5309..6bccd1290fec 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1315,7 +1315,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); // Emits an unconditional branch to the FalseBB, obtains the branch - // weight, andd adds it to the successor list. + // weight, and adds it to the successor list. FastEmitBranch(FalseMBB, DbgLoc); return true; diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll index 4bce8c80fc6a..2733089fcb10 100644 --- a/test/CodeGen/PowerPC/vec_cmp.ll +++ b/test/CodeGen/PowerPC/vec_cmp.ll @@ -36,7 +36,7 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone { ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -; Adicional tests for v16i8 since it is a altivec native type +; Additional tests for v16i8 since it is a altivec native type define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone { %cmp = icmp eq <16 x i8> %x, %y @@ -165,7 +165,7 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -; Adicional tests for v8i16 since it is an altivec native type +; Additional tests for v8i16 since it is an altivec native type define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone { entry: @@ -298,7 +298,7 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -; Adicional tests for v4si32 since it is an altivec native type +; Additional tests for v4si32 since it is an altivec native type define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone { entry: @@ -449,7 +449,7 @@ entry: ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -; Adicional tests for v4f32 since it is a altivec native type +; Additional tests for v4f32 since it is a altivec native type define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone { entry: diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll index c78b8b8f628f..65907d679780 100644 --- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll +++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll @@ -1,5 +1,5 @@ ; RUN: llc %s -o /dev/null -; Here variable bar is optimzied away. Do not trip over while trying to generate debug info. +; Here variable bar is optimized away. Do not trip over while trying to generate debug info. define i32 @foo() nounwind uwtable readnone ssp { From d308c35617935c5826972c3ddaff89f1367d9769 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 19:45:25 +0000 Subject: [PATCH 0053/1155] Remove an incorrect fixme. dynamic-no-pic is just another output type. If gnu ld gets support for MachO, it should also add something like LDPO_DYN_NO_PIC to the plugin interface. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211305 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/gold/gold-plugin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 371574f26156..a056c61817cf 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -169,8 +169,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) { tv->tv_u.tv_val); return LDPS_ERR; } - // TODO: add an option to disable PIC. - //output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC; break; case LDPT_OPTION: options::process_plugin_option(tv->tv_u.tv_string); From edd372aba2c1422ccb1e1f5ca48fb38eda658914 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 20:00:09 +0000 Subject: [PATCH 0054/1155] Fix up a few formatting issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211307 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 995bee121324..3bcff86b1c65 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -610,8 +610,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, // Now emit the table of pairs of PointerSize'ed values for the section // addresses and sizes. for (const auto &sec : Sections) { - MCSymbol* StartSymbol = sec.second.first; - MCSymbol* EndSymbol = sec.second.second; + MCSymbol *StartSymbol = sec.second.first; + MCSymbol *EndSymbol = sec.second.second; assert(StartSymbol && "StartSymbol must not be NULL"); assert(EndSymbol && "EndSymbol must not be NULL"); @@ -697,8 +697,8 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, const auto TextSection = Sections.begin(); assert(TextSection != Sections.end() && "No text section found"); - MCSymbol* StartSymbol = TextSection->second.first; - MCSymbol* EndSymbol = TextSection->second.second; + MCSymbol *StartSymbol = TextSection->second.first; + MCSymbol *EndSymbol = TextSection->second.second; assert(StartSymbol && "StartSymbol must not be NULL"); assert(EndSymbol && "EndSymbol must not be NULL"); @@ -809,8 +809,8 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) { for (const auto sec : Sections) { - MCSymbol* StartSymbol = sec.second.first; - MCSymbol* EndSymbol = sec.second.second; + MCSymbol *StartSymbol = sec.second.first; + MCSymbol *EndSymbol = sec.second.second; assert(StartSymbol && "StartSymbol must not be NULL"); assert(EndSymbol && "EndSymbol must not be NULL"); @@ -1539,13 +1539,12 @@ namespace { return CIEKey(nullptr, -1, 0, false, false); } - CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_, - unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) : - Personality(Personality_), PersonalityEncoding(PersonalityEncoding_), - LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_), - IsSimple(IsSimple_) { - } - const MCSymbol* Personality; + CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_, + unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) + : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_), + LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_), + IsSimple(IsSimple_) {} + const MCSymbol *Personality; unsigned PersonalityEncoding; unsigned LsdaEncoding; bool IsSignalFrame; @@ -1620,7 +1619,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, Emitter.setSectionStart(SectionStart); MCSymbol *FDEEnd = nullptr; - DenseMap CIEStarts; + DenseMap CIEStarts; const MCSymbol *DummyDebugKey = nullptr; NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame(); From 594f05c8dae6ec0b08694a1d439aee8bca18a47d Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 20:00:13 +0000 Subject: [PATCH 0055/1155] Since we're using DW_AT_string rather than DW_AT_strp for debug_info for assembly files we can't depend on the offset within the section after a string since it could be different between producers etc. Relax these tests accordingly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211308 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/ARM/dwarf-asm-multiple-sections.s | 4 ++-- test/MC/ARM/dwarf-asm-nonstandard-section.s | 6 +++--- test/MC/ARM/dwarf-asm-single-section.s | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s index 5e64fcc023ff..ed1b89eff3cd 100644 --- a/test/MC/ARM/dwarf-asm-multiple-sections.s +++ b/test/MC/ARM/dwarf-asm-multiple-sections.s @@ -60,8 +60,8 @@ b: // RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev // RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line // RELOC-NEXT: 00000010 R_ARM_ABS32 .debug_ranges -// RELOC-NEXT: 0000004f R_ARM_ABS32 .text -// RELOC-NEXT: 00000061 R_ARM_ABS32 foo +// RELOC-NEXT: R_ARM_ABS32 .text +// RELOC-NEXT: R_ARM_ABS32 foo // RELOC: RELOCATION RECORDS FOR [.rel.debug_ranges]: // RELOC-NEXT: 00000004 R_ARM_ABS32 .text diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s index adf92bab18d2..497a39ad1162 100644 --- a/test/MC/ARM/dwarf-asm-nonstandard-section.s +++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s @@ -46,9 +46,9 @@ b: // RELOC: RELOCATION RECORDS FOR [.rel.debug_info]: // RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev // RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line -// RELOC-NEXT: 00000010 R_ARM_ABS32 foo -// RELOC-NEXT: 00000014 R_ARM_ABS32 foo -// RELOC-NEXT: 00000053 R_ARM_ABS32 foo +// RELOC-NEXT: R_ARM_ABS32 foo +// RELOC-NEXT: R_ARM_ABS32 foo +// RELOC-NEXT: R_ARM_ABS32 foo // RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]: diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s index d69dfab5dee5..c57e6498a38a 100644 --- a/test/MC/ARM/dwarf-asm-single-section.s +++ b/test/MC/ARM/dwarf-asm-single-section.s @@ -45,9 +45,9 @@ a: // RELOC: RELOCATION RECORDS FOR [.rel.debug_info]: // RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev // RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line -// RELOC-NEXT: 00000010 R_ARM_ABS32 .text -// RELOC-NEXT: 00000014 R_ARM_ABS32 .text -// RELOC-NEXT: 00000053 R_ARM_ABS32 .text +// RELOC-NEXT: R_ARM_ABS32 .text +// RELOC-NEXT: R_ARM_ABS32 .text +// RELOC-NEXT: R_ARM_ABS32 .text // RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]: From 4110a7ac7ac9f5814b6828e498efdb3e75110f66 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 19 Jun 2014 20:08:56 +0000 Subject: [PATCH 0056/1155] Add StringMap::insert(pair) consistent with the standard associative container concept. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by Agustín Bergé. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211309 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/StringMap.h | 43 ++++++++++++++++++--------------- lib/Support/StringMap.cpp | 10 ++++++-- unittests/ADT/StringMapTest.cpp | 38 +++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 21 deletions(-) diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index 5b1868187986..c40e5e2b3d87 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -64,7 +64,7 @@ class StringMapImpl { } StringMapImpl(unsigned InitSize, unsigned ItemSize); - void RehashTable(); + unsigned RehashTable(unsigned BucketNo = 0); /// LookupBucketFor - Look up the bucket that the specified string should end /// up in. If it already exists as a key in the map, the Item pointer for the @@ -323,6 +323,28 @@ class StringMap : public StringMapImpl { return true; } + /// insert - Inserts the specified key/value pair into the map if the key + /// isn't already in the map. The bool component of the returned pair is true + /// if and only if the insertion takes place, and the iterator component of + /// the pair points to the element with key equivalent to the key of the pair. + std::pair insert(std::pair KV) { + unsigned BucketNo = LookupBucketFor(KV.first); + StringMapEntryBase *&Bucket = TheTable[BucketNo]; + if (Bucket && Bucket != getTombstoneVal()) + return std::make_pair(iterator(TheTable + BucketNo, false), + false); // Already exists in map. + + if (Bucket == getTombstoneVal()) + --NumTombstones; + Bucket = + MapEntryTy::Create(KV.first, Allocator, std::move(KV.second)); + ++NumItems; + assert(NumItems + NumTombstones <= NumBuckets); + + BucketNo = RehashTable(BucketNo); + return std::make_pair(iterator(TheTable + BucketNo, false), true); + } + // clear - Empties out the StringMap void clear() { if (empty()) return; @@ -346,24 +368,7 @@ class StringMap : public StringMapImpl { /// return. template MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) { - unsigned BucketNo = LookupBucketFor(Key); - StringMapEntryBase *&Bucket = TheTable[BucketNo]; - if (Bucket && Bucket != getTombstoneVal()) - return *static_cast(Bucket); - - MapEntryTy *NewItem = MapEntryTy::Create(Key, Allocator, std::move(Val)); - - if (Bucket == getTombstoneVal()) - --NumTombstones; - ++NumItems; - assert(NumItems + NumTombstones <= NumBuckets); - - // Fill in the bucket for the hash table. The FullHashValue was already - // filled in by LookupBucketFor. - Bucket = NewItem; - - RehashTable(); - return *NewItem; + return *insert(std::make_pair(Key, std::move(Val))).first; } MapEntryTy &GetOrCreateValue(StringRef Key) { diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 72a6d822d2b6..ddb73494ff5d 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -181,7 +181,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { /// RehashTable - Grow the table, redistributing values into the buckets with /// the appropriate mod-of-hashtable-size. -void StringMapImpl::RehashTable() { +unsigned StringMapImpl::RehashTable(unsigned BucketNo) { unsigned NewSize; unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); @@ -193,9 +193,10 @@ void StringMapImpl::RehashTable() { } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) { NewSize = NumBuckets; } else { - return; + return BucketNo; } + unsigned NewBucketNo = BucketNo; // Allocate one extra bucket which will always be non-empty. This allows the // iterators to stop at end. StringMapEntryBase **NewTableArray = @@ -215,6 +216,8 @@ void StringMapImpl::RehashTable() { if (!NewTableArray[NewBucket]) { NewTableArray[FullHash & (NewSize-1)] = Bucket; NewHashArray[FullHash & (NewSize-1)] = FullHash; + if (I == BucketNo) + NewBucketNo = NewBucket; continue; } @@ -227,6 +230,8 @@ void StringMapImpl::RehashTable() { // Finally found a slot. Fill it in. NewTableArray[NewBucket] = Bucket; NewHashArray[NewBucket] = FullHash; + if (I == BucketNo) + NewBucketNo = NewBucket; } } @@ -235,4 +240,5 @@ void StringMapImpl::RehashTable() { TheTable = NewTableArray; NumBuckets = NewSize; NumTombstones = 0; + return NewBucketNo; } diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp index 70eec873ed23..215d3dfa02e5 100644 --- a/unittests/ADT/StringMapTest.cpp +++ b/unittests/ADT/StringMapTest.cpp @@ -10,6 +10,7 @@ #include "gtest/gtest.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/DataTypes.h" +#include using namespace llvm; namespace { @@ -203,6 +204,43 @@ TEST_F(StringMapTest, InsertTest) { assertSingleItemMap(); } +// Test insert(pair) method +TEST_F(StringMapTest, InsertPairTest) { + bool Inserted; + StringMap::iterator NewIt; + std::tie(NewIt, Inserted) = + testMap.insert(std::make_pair(testKeyFirst, testValue)); + EXPECT_EQ(1u, testMap.size()); + EXPECT_EQ(testValue, testMap[testKeyFirst]); + EXPECT_EQ(testKeyFirst, NewIt->first()); + EXPECT_EQ(testValue, NewIt->second); + EXPECT_TRUE(Inserted); + + StringMap::iterator ExistingIt; + std::tie(ExistingIt, Inserted) = + testMap.insert(std::make_pair(testKeyFirst, testValue + 1)); + EXPECT_EQ(1u, testMap.size()); + EXPECT_EQ(testValue, testMap[testKeyFirst]); + EXPECT_FALSE(Inserted); + EXPECT_EQ(NewIt, ExistingIt); +} + +// Test insert(pair) method when rehashing occurs +TEST_F(StringMapTest, InsertRehashingPairTest) { + // Check that the correct iterator is returned when the inserted element is + // moved to a different bucket during internal rehashing. This depends on + // the particular key, and the implementation of StringMap and HashString. + // Changes to those might result in this test not actually checking that. + StringMap t(1); + EXPECT_EQ(1u, t.getNumBuckets()); + + StringMap::iterator It = + t.insert(std::make_pair("abcdef", 42)).first; + EXPECT_EQ(2u, t.getNumBuckets()); + EXPECT_EQ("abcdef", It->first()); + EXPECT_EQ(42u, It->second); +} + // Create a non-default constructable value struct StringMapTestStruct { StringMapTestStruct(int i) : i(i) {} From e0d89ff76414a1369c54fbd261308e1c313548d7 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 19 Jun 2014 20:20:03 +0000 Subject: [PATCH 0057/1155] Include Threading.h instead of forward declaring a function. Previously this led to a circular header dependency, but a recent change has since removed this dependency, so the correct fix is to simply include the header rather than forward declare. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211311 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Mutex.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h index f340051361b0..496a4381f3fc 100644 --- a/include/llvm/Support/Mutex.h +++ b/include/llvm/Support/Mutex.h @@ -15,13 +15,11 @@ #define LLVM_SUPPORT_MUTEX_H #include "llvm/Support/Compiler.h" +#include "llvm/Support/Threading.h" #include namespace llvm { - // Forward declare this function. - bool llvm_is_multithreaded(); - namespace sys { /// @brief Platform agnostic Mutex class. From d2f9358649478df80d688e77858dee14e37aebed Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 19 Jun 2014 21:03:04 +0000 Subject: [PATCH 0058/1155] Add a new subtarget hook for whether or not we'd like to enable the atomic load linked expander pass to run for a particular subtarget. This requires a check of the subtarget and so save the TargetMachine rather than only TargetLoweringInfo and update all callers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211314 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetSubtargetInfo.h | 3 ++ lib/CodeGen/AtomicExpandLoadLinkedPass.cpp | 33 +++++++++++++--------- lib/Target/ARM/ARMSubtarget.cpp | 4 +++ lib/Target/ARM/ARMSubtarget.h | 3 ++ lib/Target/ARM/ARMTargetMachine.cpp | 13 ++++----- lib/Target/TargetSubtargetInfo.cpp | 4 +++ 6 files changed, 39 insertions(+), 21 deletions(-) diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index bb164288b013..83f1997bf205 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -73,6 +73,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// MISchedPostRA, is set. virtual bool enablePostMachineScheduler() const; + /// \brief True if the subtarget should run the atomic expansion pass. + virtual bool enableAtomicExpandLoadLinked() const; + /// \brief Override generic scheduling policy within a region. /// /// This is a convenient way for targets that don't provide any custom diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp index 4c4150bfec90..c7cc4bcb1cb6 100644 --- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -21,17 +21,19 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "arm-atomic-expand" namespace { class AtomicExpandLoadLinked : public FunctionPass { - const TargetLowering *TLI; + const TargetMachine *TM; public: static char ID; // Pass identification, replacement for typeid explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) { + : FunctionPass(ID), TM(TM) { initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); } @@ -59,7 +61,7 @@ FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { } bool AtomicExpandLoadLinked::runOnFunction(Function &F) { - if (!TLI) + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) return false; SmallVector AtomicInsts; @@ -76,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) { bool MadeChange = false; for (Instruction *Inst : AtomicInsts) { - if (!TLI->shouldExpandAtomicInIR(Inst)) + if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst)) continue; if (AtomicRMWInst *AI = dyn_cast(Inst)) @@ -98,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { // Load instructions don't actually need a leading fence, even in the // SequentiallyConsistent case. AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); + TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic + : LI->getOrdering(); // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is // an ldrexd (A3.5.3). IRBuilder<> Builder(LI); - Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder); + Value *Val = TM->getTargetLowering()->emitLoadLinked( + Builder, LI->getPointerOperand(), MemOpOrder); insertTrailingFence(Builder, LI->getOrdering()); @@ -165,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); Value *NewVal; switch (AI->getOperation()) { @@ -211,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { llvm_unreachable("Unknown atomic op"); } - Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( + Builder, NewVal, Addr, MemOpOrder); Value *TryAgain = Builder.CreateICmpNE( StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); @@ -278,7 +282,8 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); Value *ShouldStore = Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); @@ -287,7 +292,7 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TLI->emitStoreConditional( + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); @@ -352,7 +357,7 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) return Ord; if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) @@ -365,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) return; if (Ord == Acquire || Ord == AcquireRelease) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 875f1e83f875..e7d699c75614 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -417,6 +417,10 @@ bool ARMSubtarget::enablePostMachineScheduler() const { return PostRAScheduler; } +bool ARMSubtarget::enableAtomicExpandLoadLinked() const { + return hasAnyDataBarrier() && !isThumb1Only(); +} + bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ae62a6f876fd..c13ef865389e 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -425,6 +425,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo { TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const override; + // enableAtomicExpandLoadLinked - True if we need to expand our atomics. + bool enableAtomicExpandLoadLinked() const override; + /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7a3836ae62b4..434d3b03ae48 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -171,16 +171,15 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { } void ARMPassConfig::addIRPasses() { - const ARMSubtarget *Subtarget = &getARMSubtarget(); - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { - addPass(createAtomicExpandLoadLinkedPass(TM)); + addPass(createAtomicExpandLoadLinkedPass(TM)); - // Cmpxchg instructions are often used with a subsequent comparison to - // determine whether it succeeded. We can exploit existing control-flow in - // ldrex/strex loops to simplify this, but it needs tidying up. + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + const ARMSubtarget *Subtarget = &getARMSubtarget(); + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass()); - } TargetPassConfig::addIRPasses(); } diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index 0c388f8fb26c..ce00fcc62737 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -39,6 +39,10 @@ bool TargetSubtargetInfo::useMachineScheduler() const { return enableMachineScheduler(); } +bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const { + return true; +} + bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } From 0b5745abd8e46ac60cd16f949a2fd5a581fc8fd4 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 21:14:13 +0000 Subject: [PATCH 0059/1155] Use lib/LTO directly in the gold plugin. The tools/lto API is not the best choice for implementing a gold plugin. Among other issues: * It is an stable ABI. Old errors stay and we have to be really careful before adding new features. * It has to support two fairly different linkers: gold and ld64. * We end up with a plugin that depends on a shared lib, something quiet unusual in LLVM land. * It hides LLVM. For some features in the gold plugin it would be really nice to be able to just get a Module or a GlobalValue. This change is intended to be a very direct translation from the C API. It will just enable other fixes and cleanups. Tested with a LTO bootstrap on linux. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211315 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/gold/CMakeLists.txt | 7 +-- tools/gold/gold-plugin.cpp | 93 +++++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 35 deletions(-) diff --git a/tools/gold/CMakeLists.txt b/tools/gold/CMakeLists.txt index 07a1e2849b95..3864e154548c 100644 --- a/tools/gold/CMakeLists.txt +++ b/tools/gold/CMakeLists.txt @@ -14,13 +14,14 @@ else() # ABI compatibility. add_definitions( -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 ) - set(LLVM_LINK_COMPONENTS support) + set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + LTO + ) add_llvm_loadable_module(LLVMgold gold-plugin.cpp ) - target_link_libraries(LLVMgold ${cmake_2_8_12_PRIVATE} LTO) - endif() diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index a056c61817cf..f4b5caeeff19 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -15,11 +15,15 @@ #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H #include "llvm-c/lto.h" #include "llvm/ADT/StringSet.h" +#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/LTO/LTOCodeGenerator.h" +#include "llvm/LTO/LTOModule.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include #include @@ -72,9 +76,10 @@ namespace { std::string output_name = ""; std::list Modules; std::vector Cleanup; - lto_code_gen_t code_gen = NULL; + LTOCodeGenerator *CodeGen = nullptr; StringSet<> CannotBeHidden; } +static llvm::TargetOptions TargetOpts; namespace options { enum generate_bc { BC_NO, BC_ALSO, BC_ONLY }; @@ -142,6 +147,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) { // for services. bool registeredClaimFile = false; + bool RegisteredAllSymbolsRead = false; for (; tv->tv_tag != LDPT_NULL; ++tv) { switch (tv->tv_tag) { @@ -189,7 +195,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) { if ((*callback)(all_symbols_read_hook) != LDPS_OK) return LDPS_ERR; - code_gen = lto_codegen_create(); + RegisteredAllSymbolsRead = true; } break; case LDPT_REGISTER_CLEANUP_HOOK: { ld_plugin_register_cleanup callback; @@ -233,6 +239,28 @@ ld_plugin_status onload(ld_plugin_tv *tv) { return LDPS_ERR; } + if (RegisteredAllSymbolsRead) { + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + InitializeAllDisassemblers(); + TargetOpts = InitTargetOptionsFromCodeGenFlags(); + CodeGen = new LTOCodeGenerator(); + CodeGen->setTargetOptions(TargetOpts); + if (MAttrs.size()) { + std::string Attrs; + for (unsigned I = 0; I < MAttrs.size(); ++I) { + if (I > 0) + Attrs.append(","); + Attrs.append(MAttrs[I]); + } + + CodeGen->setAttr(Attrs.c_str()); + } + } + return LDPS_OK; } @@ -241,7 +269,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) { /// with add_symbol if possible. static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, int *claimed) { - lto_module_t M; + LTOModule *M; const void *view; std::unique_ptr buffer; if (get_view) { @@ -264,17 +292,15 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, view = buffer->getBufferStart(); } - if (!lto_module_is_object_file_in_memory(view, file->filesize)) + if (!LTOModule::isBitcodeFile(view, file->filesize)) return LDPS_OK; - M = lto_module_create_from_memory(view, file->filesize); + std::string Error; + M = LTOModule::makeLTOModule(view, file->filesize, TargetOpts, Error); if (!M) { - if (const char* msg = lto_get_error_message()) { - (*message)(LDPL_ERROR, - "LLVM gold plugin has failed to create LTO module: %s", - msg); - return LDPS_ERR; - } + (*message)(LDPL_ERROR, + "LLVM gold plugin has failed to create LTO module: %s", + Error.c_str()); return LDPS_OK; } @@ -283,20 +309,20 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, claimed_file &cf = Modules.back(); if (!options::triple.empty()) - lto_module_set_target_triple(M, options::triple.c_str()); + M->setTargetTriple(options::triple.c_str()); cf.handle = file->handle; - unsigned sym_count = lto_module_get_num_symbols(M); + unsigned sym_count = M->getSymbolCount(); cf.syms.reserve(sym_count); for (unsigned i = 0; i != sym_count; ++i) { - lto_symbol_attributes attrs = lto_module_get_symbol_attribute(M, i); + lto_symbol_attributes attrs = M->getSymbolAttributes(i); if ((attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL) continue; cf.syms.push_back(ld_plugin_symbol()); ld_plugin_symbol &sym = cf.syms.back(); - sym.name = const_cast(lto_module_get_symbol_name(M, i)); + sym.name = const_cast(M->getSymbolName(i)); sym.name = strdup(sym.name); sym.version = NULL; @@ -359,15 +385,15 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, } } - if (code_gen) { - if (lto_codegen_add_module(code_gen, M)) { - (*message)(LDPL_ERROR, "Error linking module: %s", - lto_get_error_message()); + if (CodeGen) { + std::string Error; + if (!CodeGen->addModule(M, Error)) { + (*message)(LDPL_ERROR, "Error linking module: %s", Error.c_str()); return LDPS_ERR; } } - lto_module_dispose(M); + delete M; return LDPS_OK; } @@ -386,7 +412,7 @@ static bool mustPreserve(const claimed_file &F, int i) { /// codegen. static ld_plugin_status all_symbols_read_hook(void) { std::ofstream api_file; - assert(code_gen); + assert(CodeGen); if (options::generate_api_file) { api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc); @@ -403,7 +429,7 @@ static ld_plugin_status all_symbols_read_hook(void) { (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]); for (unsigned i = 0, e = I->syms.size(); i != e; i++) { if (mustPreserve(*I, i)) { - lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name); + CodeGen->addMustPreserveSymbol(I->syms[i].name); if (options::generate_api_file) api_file << I->syms[i].name << "\n"; @@ -414,16 +440,16 @@ static ld_plugin_status all_symbols_read_hook(void) { if (options::generate_api_file) api_file.close(); - lto_codegen_set_pic_model(code_gen, output_type); - lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF); + CodeGen->setCodePICModel(output_type); + CodeGen->setDebugInfo(LTO_DEBUG_MODEL_DWARF); if (!options::mcpu.empty()) - lto_codegen_set_cpu(code_gen, options::mcpu.c_str()); + CodeGen->setCpu(options::mcpu.c_str()); // Pass through extra options to the code generator. if (!options::extra.empty()) { for (std::vector::iterator it = options::extra.begin(); it != options::extra.end(); ++it) { - lto_codegen_debug_options(code_gen, (*it).c_str()); + CodeGen->setCodeGenDebugOptions((*it).c_str()); } } @@ -435,11 +461,12 @@ static ld_plugin_status all_symbols_read_hook(void) { path = options::bc_path; else path = output_name + ".bc"; - bool err = lto_codegen_write_merged_modules(code_gen, path.c_str()); - if (err) + CodeGen->parseCodeGenDebugOptions(); + std::string Error; + if (!CodeGen->writeMergedModules(path.c_str(), Error)) (*message)(LDPL_FATAL, "Failed to write the output file."); if (options::generate_bc_file == options::BC_ONLY) { - lto_codegen_dispose(code_gen); + delete CodeGen; exit(0); } } @@ -447,13 +474,15 @@ static ld_plugin_status all_symbols_read_hook(void) { std::string ObjPath; { const char *Temp; - if (lto_codegen_compile_to_file(code_gen, &Temp)) { + CodeGen->parseCodeGenDebugOptions(); + std::string Error; + if (!CodeGen->compile_to_file(&Temp, /*DisableOpt*/ false, /*DisableInline*/ + false, /*DisableGVNLoadPRE*/ false, Error)) (*message)(LDPL_ERROR, "Could not produce a combined object file\n"); - } ObjPath = Temp; } - lto_codegen_dispose(code_gen); + delete CodeGen; for (std::list::iterator I = Modules.begin(), E = Modules.end(); I != E; ++I) { for (unsigned i = 0; i != I->syms.size(); ++i) { From c28016e4131bdd50c23510adb3a0254b7fb5adc6 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 19 Jun 2014 22:03:18 +0000 Subject: [PATCH 0060/1155] =?UTF-8?q?Change=20the=20output=20of=20llvm-nm?= =?UTF-8?q?=20and=20llvm-size=20for=20Mach-O=20universal=20files=20(aka=20?= =?UTF-8?q?fat=20files)=20to=20print=20=E2=80=9C=20(for=20architecture=20X?= =?UTF-8?q?YZ)=E2=80=9D=20for=20fat=20files=20with=20more=20than=20one=20a?= =?UTF-8?q?rchitecture=20to=20be=20like=20what=20the=20darwin=20tools=20do?= =?UTF-8?q?=20for=20fat=20files.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also clean up the Mach-O printing of archive membernames in llvm-nm to use the darwin form of "libx.a(foo.o)". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211316 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Object/MachOUniversal.h | 4 ++++ lib/Object/MachOUniversal.cpp | 8 ++------ test/Object/nm-universal-binary.test | 8 ++++---- test/Object/size-trivial-macho.test | 8 ++++---- tools/llvm-nm/llvm-nm.cpp | 22 ++++++++++++++++++---- tools/llvm-size/llvm-size.cpp | 19 +++++++++++++++---- 6 files changed, 47 insertions(+), 22 deletions(-) diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h index 47e93c26b46a..74448f973b2e 100644 --- a/include/llvm/Object/MachOUniversal.h +++ b/include/llvm/Object/MachOUniversal.h @@ -18,6 +18,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/MachO.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MachO.h" @@ -52,6 +53,9 @@ class MachOUniversalBinary : public Binary { ObjectForArch getNext() const { return ObjectForArch(Parent, Index + 1); } uint32_t getCPUType() const { return Header.cputype; } + std::string getArchTypeName() const { + return Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype)); + } std::error_code getAsObjectFile(std::unique_ptr &Result) const; diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index 05729ef7467e..887e2bd0a34b 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -72,9 +72,7 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile( if (Parent) { StringRef ParentData = Parent->getData(); StringRef ObjectData = ParentData.substr(Header.offset, Header.size); - std::string ObjectName = - Parent->getFileName().str() + ":" + - Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype)); + std::string ObjectName = Parent->getFileName().str(); MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer( ObjectData, ObjectName, false); ErrorOr Obj = ObjectFile::createMachOObjectFile(ObjBuffer); @@ -91,9 +89,7 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsArchive( if (Parent) { StringRef ParentData = Parent->getData(); StringRef ObjectData = ParentData.substr(Header.offset, Header.size); - std::string ObjectName = - Parent->getFileName().str() + ":" + - Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype)); + std::string ObjectName = Parent->getFileName().str(); MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer( ObjectData, ObjectName, false); ErrorOr Obj = Archive::create(ObjBuffer); diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test index c20c733dcd8b..52781267c5cb 100644 --- a/test/Object/nm-universal-binary.test +++ b/test/Object/nm-universal-binary.test @@ -3,17 +3,17 @@ RUN: | FileCheck %s -check-prefix CHECK-OBJ RUN: llvm-nm %p/Inputs/macho-universal-archive.x86_64.i386 \ RUN: | FileCheck %s -check-prefix CHECK-AR -CHECK-OBJ: macho-universal.x86_64.i386:x86_64 +CHECK-OBJ: macho-universal.x86_64.i386 (for architecture x86_64): CHECK-OBJ: 0000000100000f60 T _main -CHECK-OBJ: macho-universal.x86_64.i386:i386 +CHECK-OBJ: macho-universal.x86_64.i386 (for architecture i386): CHECK-OBJ: 00001fa0 T _main -CHECK-AR: macho-universal-archive.x86_64.i386:x86_64:hello.o: +CHECK-AR: macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64): CHECK-AR: 0000000000000068 s EH_frame0 CHECK-AR: 000000000000003b s L_.str CHECK-AR: 0000000000000000 T _main CHECK-AR: 0000000000000080 S _main.eh CHECK-AR: U _printf -CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o: +CHECK-AR: macho-universal-archive.x86_64.i386(foo.o) (for architecture i386): CHECK-AR: 00000008 S _bar CHECK-AR: 00000000 T _foo diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test index 1ed611b0de22..6602d5651860 100644 --- a/test/Object/size-trivial-macho.test +++ b/test/Object/size-trivial-macho.test @@ -71,9 +71,9 @@ mxl: Segment __LINKEDIT: 0x1000 (vmaddr 0x100002000 fileoff 8192) mxl: total 0x100003000 u: __TEXT __DATA __OBJC others dec hex -u: 4096 0 0 4294971392 4294975488 100002000 {{.*}}/macho-universal.x86_64.i386:x86_64 -u: 4096 0 0 8192 12288 3000 {{.*}}/macho-universal.x86_64.i386:i386 +u: 4096 0 0 4294971392 4294975488 100002000 {{.*}}/macho-universal.x86_64.i386 (for architecture x86_64) +u: 4096 0 0 8192 12288 3000 {{.*}}/macho-universal.x86_64.i386 (for architecture i386) uAR: __TEXT __DATA __OBJC others dec hex -uAR: 136 0 0 32 168 a8 {{.*}}/macho-universal-archive.x86_64.i386:x86_64(hello.o) -uAR: 5 4 0 0 9 9 {{.*}}/macho-universal-archive.x86_64.i386:i386(foo.o) +uAR: 136 0 0 32 168 a8 {{.*}}/macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64) +uAR: 5 4 0 0 9 9 {{.*}}/macho-universal-archive.x86_64.i386(foo.o) (for architecture i386) diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index e605e6e430a7..c6b80d1dd965 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -752,20 +752,28 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { if (ChildOrErr.getError()) continue; if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { - outs() << O->getFileName() << ":\n"; + if (isa(O)) { + outs() << Filename << "(" << O->getFileName() << ")"; + } else + outs() << O->getFileName(); + outs() << ":\n"; dumpSymbolNamesFromObject(O); } } return; } if (MachOUniversalBinary *UB = dyn_cast(Bin.get())) { + bool moreThanOneArch = UB->getNumberOfObjects() > 1; for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), E = UB->end_objects(); I != E; ++I) { std::unique_ptr Obj; std::unique_ptr A; if (!I->getAsObjectFile(Obj)) { - outs() << Obj->getFileName() << ":\n"; + outs() << Obj->getFileName(); + if (isa(Obj.get()) && moreThanOneArch) + outs() << " (for architecture " << I->getArchTypeName() << ")"; + outs() << ":\n"; dumpSymbolNamesFromObject(Obj.get()); } else if (!I->getAsArchive(A)) { @@ -776,8 +784,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { if (ChildOrErr.getError()) continue; if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { - outs() << A->getFileName() << ":"; - outs() << O->getFileName() << ":\n"; + outs() << A->getFileName(); + if (isa(O)) { + outs() << "(" << O->getFileName() << ")"; + if (moreThanOneArch) + outs() << " (for architecture " << I->getArchTypeName() << ")"; + } else + outs() << ":" << O->getFileName(); + outs() << ":\n"; dumpSymbolNamesFromObject(O); } } diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp index b3aaac8d27dd..408bb4a18804 100644 --- a/tools/llvm-size/llvm-size.cpp +++ b/tools/llvm-size/llvm-size.cpp @@ -469,13 +469,20 @@ static void PrintFileSectionSizes(StringRef file) { std::unique_ptr UA; if (!I->getAsObjectFile(UO)) { if (ObjectFile *o = dyn_cast(&*UO.get())) { + MachOObjectFile *MachO = dyn_cast(o); if (OutputFormat == sysv) outs() << o->getFileName() << " :\n"; + else if(MachO && OutputFormat == darwin) { + if (moreThanOneFile || moreThanOneArch) + outs() << o->getFileName() << " (for architecture " + << I->getArchTypeName() << "):"; + outs() << "\n"; + } PrintObjectSectionSizes(o); if (OutputFormat == berkeley) { - MachOObjectFile *MachO = dyn_cast(o); if (!MachO || moreThanOneFile || moreThanOneArch) - outs() << o->getFileName(); + outs() << o->getFileName() << " (for architecture " + << I->getArchTypeName() << ")"; outs() << "\n"; } } @@ -495,11 +502,15 @@ static void PrintFileSectionSizes(StringRef file) { outs() << o->getFileName() << " (ex " << UA->getFileName() << "):\n"; else if(MachO && OutputFormat == darwin) - outs() << UA->getFileName() << "(" << o->getFileName() << "):\n"; + outs() << UA->getFileName() << "(" << o->getFileName() << ")" + << " (for architecture " << I->getArchTypeName() + << "):\n"; PrintObjectSectionSizes(o); if (OutputFormat == berkeley) { if (MachO) - outs() << UA->getFileName() << "(" << o->getFileName() << ")\n"; + outs() << UA->getFileName() << "(" << o->getFileName() << ")" + << " (for architecture " << I->getArchTypeName() + << ")\n"; else outs() << o->getFileName() << " (ex " << UA->getFileName() << ")\n"; From 9eb38b233e5353a80c7d99676a08fe87d87debdb Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 22:14:12 +0000 Subject: [PATCH 0061/1155] Set missing options in LTOCodeGenerator::setTargetOptions. Patch by Tom Roeder, I just added the test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211317 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/LTO/LTOCodeGenerator.cpp | 5 +++++ test/LTO/jump-table-type.ll | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 test/LTO/jump-table-type.ll diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 9009958613ed..2772676c8e8c 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -141,6 +141,11 @@ void LTOCodeGenerator::setTargetOptions(TargetOptions options) { Options.TrapFuncName = options.TrapFuncName; Options.PositionIndependentExecutable = options.PositionIndependentExecutable; Options.UseInitArray = options.UseInitArray; + Options.DataSections = options.DataSections; + Options.FunctionSections = options.FunctionSections; + + Options.MCOptions = options.MCOptions; + Options.JTType = options.JTType; } void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) { diff --git a/test/LTO/jump-table-type.ll b/test/LTO/jump-table-type.ll new file mode 100644 index 000000000000..a39d3e959830 --- /dev/null +++ b/test/LTO/jump-table-type.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as <%s >%t1 +; RUN: llvm-lto -o %t2 %t1 -jump-table-type=arity +; RUN: llvm-nm %t2 | FileCheck %s + +; CHECK: T __llvm_jump_instr_table_0_1 +; CHECK: T __llvm_jump_instr_table_1_1 + +target triple = "x86_64-unknown-linux-gnu" + +define i32 @g(i32 %a) unnamed_addr jumptable { + ret i32 %a +} + +define i32 @f() unnamed_addr jumptable { + ret i32 0 +} + +define i32 @main() { + ret i32 0 +} + +@llvm.used = appending global [2 x i8*] [i8* bitcast (i32(i32)* @g to i8*), + i8* bitcast (i32()* @f to i8*)] From 474f2238604c3e1c81e2094acf53726e43c96fd3 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 22:20:07 +0000 Subject: [PATCH 0062/1155] Reduce indentation. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211318 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/gold/gold-plugin.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index f4b5caeeff19..e21a82373f7d 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -239,26 +239,26 @@ ld_plugin_status onload(ld_plugin_tv *tv) { return LDPS_ERR; } - if (RegisteredAllSymbolsRead) { - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - InitializeAllAsmPrinters(); - InitializeAllDisassemblers(); - TargetOpts = InitTargetOptionsFromCodeGenFlags(); - CodeGen = new LTOCodeGenerator(); - CodeGen->setTargetOptions(TargetOpts); - if (MAttrs.size()) { - std::string Attrs; - for (unsigned I = 0; I < MAttrs.size(); ++I) { - if (I > 0) - Attrs.append(","); - Attrs.append(MAttrs[I]); - } + if (!RegisteredAllSymbolsRead) + return LDPS_OK; - CodeGen->setAttr(Attrs.c_str()); + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + InitializeAllDisassemblers(); + TargetOpts = InitTargetOptionsFromCodeGenFlags(); + CodeGen = new LTOCodeGenerator(); + CodeGen->setTargetOptions(TargetOpts); + if (MAttrs.size()) { + std::string Attrs; + for (unsigned I = 0; I < MAttrs.size(); ++I) { + if (I > 0) + Attrs.append(","); + Attrs.append(MAttrs[I]); } + CodeGen->setAttr(Attrs.c_str()); } return LDPS_OK; From 5c2871c78139ed8bee12552f8c8c3ff9afe6f60e Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 22:27:46 +0000 Subject: [PATCH 0063/1155] Use the assignment operator. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211319 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/LTO/LTOCodeGenerator.cpp | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 2772676c8e8c..a1709f60fff2 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -124,28 +124,7 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { } void LTOCodeGenerator::setTargetOptions(TargetOptions options) { - Options.LessPreciseFPMADOption = options.LessPreciseFPMADOption; - Options.NoFramePointerElim = options.NoFramePointerElim; - Options.AllowFPOpFusion = options.AllowFPOpFusion; - Options.UnsafeFPMath = options.UnsafeFPMath; - Options.NoInfsFPMath = options.NoInfsFPMath; - Options.NoNaNsFPMath = options.NoNaNsFPMath; - Options.HonorSignDependentRoundingFPMathOption = - options.HonorSignDependentRoundingFPMathOption; - Options.UseSoftFloat = options.UseSoftFloat; - Options.FloatABIType = options.FloatABIType; - Options.NoZerosInBSS = options.NoZerosInBSS; - Options.GuaranteedTailCallOpt = options.GuaranteedTailCallOpt; - Options.DisableTailCalls = options.DisableTailCalls; - Options.StackAlignmentOverride = options.StackAlignmentOverride; - Options.TrapFuncName = options.TrapFuncName; - Options.PositionIndependentExecutable = options.PositionIndependentExecutable; - Options.UseInitArray = options.UseInitArray; - Options.DataSections = options.DataSections; - Options.FunctionSections = options.FunctionSections; - - Options.MCOptions = options.MCOptions; - Options.JTType = options.JTType; + Options = options; } void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) { From 7c9dcfb29e5095d560206973c40e6a33ba108c21 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 22:33:23 +0000 Subject: [PATCH 0064/1155] Simplify. No functionality change. Thanks to Alp Toker for noticing it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211320 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/gold/gold-plugin.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index e21a82373f7d..2cde5d1c579c 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -322,8 +322,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, cf.syms.push_back(ld_plugin_symbol()); ld_plugin_symbol &sym = cf.syms.back(); - sym.name = const_cast(M->getSymbolName(i)); - sym.name = strdup(sym.name); + sym.name = strdup(M->getSymbolName(i)); sym.version = NULL; int scope = attrs & LTO_SYMBOL_SCOPE_MASK; From f7b992bdcd05fd6844f96e0431505d0b9c9956f9 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 19 Jun 2014 22:49:21 +0000 Subject: [PATCH 0065/1155] =?UTF-8?q?Fix=20the=20output=20of=20llvm-nm=20f?= =?UTF-8?q?or=20Mach-O=20files=20to=20use=20the=20characters=20=E2=80=98d?= =?UTF-8?q?=E2=80=99=20and=20=E2=80=98b=E2=80=99=20for=20data=20and=20bss?= =?UTF-8?q?=20symbols=20instead=20of=20the=20generic=20=E2=80=99s=E2=80=99?= =?UTF-8?q?=20for=20a=20symbol=20in=20a=20section.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211321 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Object/nm-trivial-object.test | 7 +++++++ test/Object/nm-universal-binary.test | 2 +- tools/llvm-nm/llvm-nm.cpp | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test index 20ac6621e728..c53dc91ecd4c 100644 --- a/test/Object/nm-trivial-object.test +++ b/test/Object/nm-trivial-object.test @@ -14,6 +14,8 @@ RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \ RUN: | FileCheck %s -check-prefix macho RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \ RUN: | FileCheck %s -check-prefix macho64 +RUN: llvm-nm %p/Inputs/macho-text-data-bss.macho-x86_64 \ +RUN: | FileCheck %s -check-prefix macho-tdb RUN: llvm-nm %p/Inputs/common.coff-i386 \ RUN: | FileCheck %s -check-prefix COFF-COMMON RUN: llvm-nm %p/Inputs/relocatable-with-section-address.elf-x86-64 \ @@ -64,6 +66,11 @@ macho64: U _SomeOtherFunction macho64: 0000000000000000 T _main macho64: U _puts +macho-tdb: 0000000000000030 s EH_frame0 +macho-tdb: 0000000000000070 b _b +macho-tdb: 000000000000000c D _d +macho-tdb: 0000000000000000 T _t +macho-tdb: 0000000000000048 S _t.eh Test that nm uses addresses even with ELF .o files. ELF-SEC-ADDR64: 0000000000000058 D a diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test index 52781267c5cb..8992359a79b6 100644 --- a/test/Object/nm-universal-binary.test +++ b/test/Object/nm-universal-binary.test @@ -15,5 +15,5 @@ CHECK-AR: 0000000000000000 T _main CHECK-AR: 0000000000000080 S _main.eh CHECK-AR: U _printf CHECK-AR: macho-universal-archive.x86_64.i386(foo.o) (for architecture i386): -CHECK-AR: 00000008 S _bar +CHECK-AR: 00000008 D _bar CHECK-AR: 00000000 T _foo diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index c6b80d1dd965..5062435d8971 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -577,6 +577,10 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) { StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref); if (SegmentName == "__TEXT" && SectionName == "__text") return 't'; + else if (SegmentName == "__DATA" && SectionName == "__data") + return 'd'; + else if (SegmentName == "__DATA" && SectionName == "__bss") + return 'b'; else return 's'; } From f38fdf1b612bf9e0a19d8d08d4283baa05334bcb Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 22:54:47 +0000 Subject: [PATCH 0066/1155] Set gold plugin options in a sane order. This fixes the processing of --plugin-opt=-jump-table-type=arity. Nice properties: * We call InitTargetOptionsFromCodeGenFlags once. * We call parseCodeGenDebugOptions once. * It works :-) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211322 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/gold/gold-plugin.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 2cde5d1c579c..bb8b32b814f9 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -248,9 +248,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) { InitializeAllAsmParsers(); InitializeAllAsmPrinters(); InitializeAllDisassemblers(); - TargetOpts = InitTargetOptionsFromCodeGenFlags(); CodeGen = new LTOCodeGenerator(); - CodeGen->setTargetOptions(TargetOpts); if (MAttrs.size()) { std::string Attrs; for (unsigned I = 0; I < MAttrs.size(); ++I) { @@ -261,6 +259,18 @@ ld_plugin_status onload(ld_plugin_tv *tv) { CodeGen->setAttr(Attrs.c_str()); } + // Pass through extra options to the code generator. + if (!options::extra.empty()) { + for (std::vector::iterator it = options::extra.begin(); + it != options::extra.end(); ++it) { + CodeGen->setCodeGenDebugOptions((*it).c_str()); + } + } + + CodeGen->parseCodeGenDebugOptions(); + TargetOpts = InitTargetOptionsFromCodeGenFlags(); + CodeGen->setTargetOptions(TargetOpts); + return LDPS_OK; } @@ -444,14 +454,6 @@ static ld_plugin_status all_symbols_read_hook(void) { if (!options::mcpu.empty()) CodeGen->setCpu(options::mcpu.c_str()); - // Pass through extra options to the code generator. - if (!options::extra.empty()) { - for (std::vector::iterator it = options::extra.begin(); - it != options::extra.end(); ++it) { - CodeGen->setCodeGenDebugOptions((*it).c_str()); - } - } - if (options::generate_bc_file != options::BC_NO) { std::string path; if (options::generate_bc_file == options::BC_ONLY) @@ -460,7 +462,6 @@ static ld_plugin_status all_symbols_read_hook(void) { path = options::bc_path; else path = output_name + ".bc"; - CodeGen->parseCodeGenDebugOptions(); std::string Error; if (!CodeGen->writeMergedModules(path.c_str(), Error)) (*message)(LDPL_FATAL, "Failed to write the output file."); @@ -473,7 +474,6 @@ static ld_plugin_status all_symbols_read_hook(void) { std::string ObjPath; { const char *Temp; - CodeGen->parseCodeGenDebugOptions(); std::string Error; if (!CodeGen->compile_to_file(&Temp, /*DisableOpt*/ false, /*DisableInline*/ false, /*DisableGVNLoadPRE*/ false, Error)) From 2ae686a2ab90def4f9fa38c1407bbe7f062068f3 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 19 Jun 2014 23:06:53 +0000 Subject: [PATCH 0067/1155] The gold plugin doesn't need disassemblers. Back in r128440 tools/LTO started exporting the disassembler interface. It was never clear why, but whatever the reason I am pretty sure it doesn't hold for tools/gold. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211325 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/gold/gold-plugin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index bb8b32b814f9..4e273de3404a 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -247,7 +247,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) { InitializeAllTargetMCs(); InitializeAllAsmParsers(); InitializeAllAsmPrinters(); - InitializeAllDisassemblers(); CodeGen = new LTOCodeGenerator(); if (MAttrs.size()) { std::string Attrs; From d1e64617ee1b76c28fa512d622f0b977144b300f Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Fri, 20 Jun 2014 00:04:16 +0000 Subject: [PATCH 0068/1155] Added the -m option as an alias for -format=darwin to llvm-nm and llvm-size which is what the darwin tools use for the Mach-O format output. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211326 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Object/nm-darwin-m.test | 2 +- test/Object/size-trivial-macho.test | 2 +- tools/llvm-nm/llvm-nm.cpp | 3 +++ tools/llvm-size/llvm-size.cpp | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/test/Object/nm-darwin-m.test b/test/Object/nm-darwin-m.test index 6c718128aa61..5bb19dcacd37 100644 --- a/test/Object/nm-darwin-m.test +++ b/test/Object/nm-darwin-m.test @@ -2,7 +2,7 @@ RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test1.mach0-armv7 \ RUN: | FileCheck %s -check-prefix test1 RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test2.macho-i386 \ RUN: | FileCheck %s -check-prefix test2 -RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test3.macho-x86-64 \ +RUN: llvm-nm -m %p/Inputs/darwin-m-test3.macho-x86-64 \ RUN: | FileCheck %s -check-prefix test3 # This is testing that the various bits in the n_desc feild are correct diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test index 6602d5651860..1642790c2c7b 100644 --- a/test/Object/size-trivial-macho.test +++ b/test/Object/size-trivial-macho.test @@ -8,7 +8,7 @@ RUN: llvm-size %p/Inputs/macho-archive-x86_64.a \ RUN: | FileCheck %s -check-prefix AR RUN: llvm-size -format darwin %p/Inputs/macho-archive-x86_64.a \ RUN: | FileCheck %s -check-prefix mAR -RUN: llvm-size -format darwin -x -l %p/Inputs/hello-world.macho-x86_64 \ +RUN: llvm-size -m -x -l %p/Inputs/hello-world.macho-x86_64 \ RUN: | FileCheck %s -check-prefix mxl RUN: llvm-size %p/Inputs/macho-universal.x86_64.i386 \ RUN: | FileCheck %s -check-prefix u diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index 5062435d8971..cccddb0736d1 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -81,6 +81,7 @@ cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"), cl::opt BSDFormat("B", cl::desc("Alias for --format=bsd")); cl::opt POSIXFormat("P", cl::desc("Alias for --format=posix")); +cl::opt DarwinFormat("m", cl::desc("Alias for --format=darwin")); cl::opt PrintFileName( "print-file-name", @@ -828,6 +829,8 @@ int main(int argc, char **argv) { OutputFormat = bsd; if (POSIXFormat) OutputFormat = posix; + if (DarwinFormat) + OutputFormat = darwin; // The relative order of these is important. If you pass --size-sort it should // only print out the size. However, if you pass -S --size-sort, it should diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp index 408bb4a18804..b71380dcaa4a 100644 --- a/tools/llvm-size/llvm-size.cpp +++ b/tools/llvm-size/llvm-size.cpp @@ -46,6 +46,7 @@ static cl::opt OutputFormatShort(cl::desc("Specify output format"), cl::values(clEnumValN(sysv, "A", "System V format"), clEnumValN(berkeley, "B", "Berkeley format"), + clEnumValN(darwin, "m", "Darwin -m format"), clEnumValEnd), cl::init(berkeley)); From 6fda71e05edc6447f780dee7fff8bd4bf543f39e Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 00:23:03 +0000 Subject: [PATCH 0069/1155] Revert "Add StringMap::insert(pair) consistent with the standard associative container concept." This reverts commit r211309. It looks like it broke some bots: http://lab.llvm.org:8011/builders/clang-x86_64-ubuntu-gdb-75/builds/15563/steps/compile/logs/stdio git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211328 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/StringMap.h | 43 +++++++++++++++------------------ lib/Support/StringMap.cpp | 10 ++------ unittests/ADT/StringMapTest.cpp | 38 ----------------------------- 3 files changed, 21 insertions(+), 70 deletions(-) diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index c40e5e2b3d87..5b1868187986 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -64,7 +64,7 @@ class StringMapImpl { } StringMapImpl(unsigned InitSize, unsigned ItemSize); - unsigned RehashTable(unsigned BucketNo = 0); + void RehashTable(); /// LookupBucketFor - Look up the bucket that the specified string should end /// up in. If it already exists as a key in the map, the Item pointer for the @@ -323,28 +323,6 @@ class StringMap : public StringMapImpl { return true; } - /// insert - Inserts the specified key/value pair into the map if the key - /// isn't already in the map. The bool component of the returned pair is true - /// if and only if the insertion takes place, and the iterator component of - /// the pair points to the element with key equivalent to the key of the pair. - std::pair insert(std::pair KV) { - unsigned BucketNo = LookupBucketFor(KV.first); - StringMapEntryBase *&Bucket = TheTable[BucketNo]; - if (Bucket && Bucket != getTombstoneVal()) - return std::make_pair(iterator(TheTable + BucketNo, false), - false); // Already exists in map. - - if (Bucket == getTombstoneVal()) - --NumTombstones; - Bucket = - MapEntryTy::Create(KV.first, Allocator, std::move(KV.second)); - ++NumItems; - assert(NumItems + NumTombstones <= NumBuckets); - - BucketNo = RehashTable(BucketNo); - return std::make_pair(iterator(TheTable + BucketNo, false), true); - } - // clear - Empties out the StringMap void clear() { if (empty()) return; @@ -368,7 +346,24 @@ class StringMap : public StringMapImpl { /// return. template MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) { - return *insert(std::make_pair(Key, std::move(Val))).first; + unsigned BucketNo = LookupBucketFor(Key); + StringMapEntryBase *&Bucket = TheTable[BucketNo]; + if (Bucket && Bucket != getTombstoneVal()) + return *static_cast(Bucket); + + MapEntryTy *NewItem = MapEntryTy::Create(Key, Allocator, std::move(Val)); + + if (Bucket == getTombstoneVal()) + --NumTombstones; + ++NumItems; + assert(NumItems + NumTombstones <= NumBuckets); + + // Fill in the bucket for the hash table. The FullHashValue was already + // filled in by LookupBucketFor. + Bucket = NewItem; + + RehashTable(); + return *NewItem; } MapEntryTy &GetOrCreateValue(StringRef Key) { diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index ddb73494ff5d..72a6d822d2b6 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -181,7 +181,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { /// RehashTable - Grow the table, redistributing values into the buckets with /// the appropriate mod-of-hashtable-size. -unsigned StringMapImpl::RehashTable(unsigned BucketNo) { +void StringMapImpl::RehashTable() { unsigned NewSize; unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); @@ -193,10 +193,9 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) { } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) { NewSize = NumBuckets; } else { - return BucketNo; + return; } - unsigned NewBucketNo = BucketNo; // Allocate one extra bucket which will always be non-empty. This allows the // iterators to stop at end. StringMapEntryBase **NewTableArray = @@ -216,8 +215,6 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) { if (!NewTableArray[NewBucket]) { NewTableArray[FullHash & (NewSize-1)] = Bucket; NewHashArray[FullHash & (NewSize-1)] = FullHash; - if (I == BucketNo) - NewBucketNo = NewBucket; continue; } @@ -230,8 +227,6 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) { // Finally found a slot. Fill it in. NewTableArray[NewBucket] = Bucket; NewHashArray[NewBucket] = FullHash; - if (I == BucketNo) - NewBucketNo = NewBucket; } } @@ -240,5 +235,4 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) { TheTable = NewTableArray; NumBuckets = NewSize; NumTombstones = 0; - return NewBucketNo; } diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp index 215d3dfa02e5..70eec873ed23 100644 --- a/unittests/ADT/StringMapTest.cpp +++ b/unittests/ADT/StringMapTest.cpp @@ -10,7 +10,6 @@ #include "gtest/gtest.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/DataTypes.h" -#include using namespace llvm; namespace { @@ -204,43 +203,6 @@ TEST_F(StringMapTest, InsertTest) { assertSingleItemMap(); } -// Test insert(pair) method -TEST_F(StringMapTest, InsertPairTest) { - bool Inserted; - StringMap::iterator NewIt; - std::tie(NewIt, Inserted) = - testMap.insert(std::make_pair(testKeyFirst, testValue)); - EXPECT_EQ(1u, testMap.size()); - EXPECT_EQ(testValue, testMap[testKeyFirst]); - EXPECT_EQ(testKeyFirst, NewIt->first()); - EXPECT_EQ(testValue, NewIt->second); - EXPECT_TRUE(Inserted); - - StringMap::iterator ExistingIt; - std::tie(ExistingIt, Inserted) = - testMap.insert(std::make_pair(testKeyFirst, testValue + 1)); - EXPECT_EQ(1u, testMap.size()); - EXPECT_EQ(testValue, testMap[testKeyFirst]); - EXPECT_FALSE(Inserted); - EXPECT_EQ(NewIt, ExistingIt); -} - -// Test insert(pair) method when rehashing occurs -TEST_F(StringMapTest, InsertRehashingPairTest) { - // Check that the correct iterator is returned when the inserted element is - // moved to a different bucket during internal rehashing. This depends on - // the particular key, and the implementation of StringMap and HashString. - // Changes to those might result in this test not actually checking that. - StringMap t(1); - EXPECT_EQ(1u, t.getNumBuckets()); - - StringMap::iterator It = - t.insert(std::make_pair("abcdef", 42)).first; - EXPECT_EQ(2u, t.getNumBuckets()); - EXPECT_EQ("abcdef", It->first()); - EXPECT_EQ(42u, It->second); -} - // Create a non-default constructable value struct StringMapTestStruct { StringMapTestStruct(int i) : i(i) {} From 160dcf5b61b8d328cd1705a90c1e0ae27dcabd41 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 20 Jun 2014 00:38:12 +0000 Subject: [PATCH 0070/1155] Don't build switch lookup tables for dllimport or TLS variables We would previously put dllimport variables in switch lookup tables, which doesn't work because the address cannot be used in a constant initializer. This is basically the same problem that we have in PR19955. Putting TLS variables in switch tables also desn't work, because the address of such a variable is not constant. Differential Revision: http://reviews.llvm.org/D4220 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211331 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/Constant.h | 3 ++ lib/IR/Constants.cpp | 53 ++++++++++++------- lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++ .../SimplifyCFG/X86/switch_to_lookup_table.ll | 52 ++++++++++++++++++ 4 files changed, 92 insertions(+), 20 deletions(-) diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h index 39c7c37dafd5..257bb80f2119 100644 --- a/include/llvm/IR/Constant.h +++ b/include/llvm/IR/Constant.h @@ -71,6 +71,9 @@ class Constant : public User { /// isThreadDependent - Return true if the value can vary between threads. bool isThreadDependent() const; + /// Return true if the value is dependent on a dllimport variable. + bool isDLLImportDependent() const; + /// isConstantUsed - Return true if the constant has users other than constant /// exprs and other dangling things. bool isConstantUsed() const; diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index aa26cff6a7b6..5851625383b9 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -278,35 +278,48 @@ bool Constant::canTrap() const { return canTrapImpl(this, NonTrappingOps); } -/// isThreadDependent - Return true if the value can vary between threads. -bool Constant::isThreadDependent() const { - SmallPtrSet Visited; - SmallVector WorkList; - WorkList.push_back(this); - Visited.insert(this); +/// Check if C contains a GlobalValue for which Predicate is true. +static bool +ConstHasGlobalValuePredicate(const Constant *C, + bool (*Predicate)(const GlobalValue *)) { + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(C); + Visited.insert(C); while (!WorkList.empty()) { - const Constant *C = WorkList.pop_back_val(); - - if (const GlobalVariable *GV = dyn_cast(C)) { - if (GV->isThreadLocal()) + const Constant *WorkItem = WorkList.pop_back_val(); + if (const auto *GV = dyn_cast(WorkItem)) + if (Predicate(GV)) return true; - } - - for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) { - const Constant *D = dyn_cast(C->getOperand(I)); - if (!D) + for (const Value *Op : WorkItem->operands()) { + const Constant *ConstOp = dyn_cast(Op); + if (!ConstOp) continue; - if (Visited.insert(D)) - WorkList.push_back(D); + if (Visited.insert(ConstOp)) + WorkList.push_back(ConstOp); } } - return false; } -/// isConstantUsed - Return true if the constant has users other than constant -/// exprs and other dangling things. +/// Return true if the value can vary between threads. +bool Constant::isThreadDependent() const { + auto DLLImportPredicate = [](const GlobalValue *GV) { + return GV->isThreadLocal(); + }; + return ConstHasGlobalValuePredicate(this, DLLImportPredicate); +} + +bool Constant::isDLLImportDependent() const { + auto DLLImportPredicate = [](const GlobalValue *GV) { + return GV->hasDLLImportStorageClass(); + }; + return ConstHasGlobalValuePredicate(this, DLLImportPredicate); +} + +/// Return true if the constant has users other than constant exprs and other +/// dangling things. bool Constant::isConstantUsed() const { for (const User *U : users()) { const Constant *UC = dyn_cast(U); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e155daf6fcce..ff2f2a036225 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3313,6 +3313,10 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { static bool ValidLookupTableConstant(Constant *C) { if (ConstantExpr *CE = dyn_cast(C)) return CE->isGEPWithNoNotionalOverIndexing(); + if (C->isThreadDependent()) + return false; + if (C->isDLLImportDependent()) + return false; return isa(C) || isa(C) || diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index 81079b1aa5ab..ee63d2c0c0f6 100644 --- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -918,3 +918,55 @@ return: ; CHECK: switch i32 ; CHECK-NOT: @switch.table } + +; Don't build tables for switches with TLS variables. +@tls_a = thread_local global i32 0 +@tls_b = thread_local global i32 0 +@tls_c = thread_local global i32 0 +@tls_d = thread_local global i32 0 +define i32* @tls(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %return + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ] +sw.bb1: + br label %return +sw.bb2: + br label %return +sw.default: + br label %return +return: + %retval.0 = phi i32* [ @tls_d, %sw.default ], [ @tls_c, %sw.bb2 ], [ @tls_b, %sw.bb1 ], [ @tls_a, %entry ] + ret i32* %retval.0 +; CHECK-LABEL: @tls( +; CHECK: switch i32 +; CHECK-NOT: @switch.table +} + +; Don't build tables for switches with dllimport variables. +@dllimport_a = external dllimport global i32 +@dllimport_b = external dllimport global i32 +@dllimport_c = external dllimport global i32 +@dllimport_d = external dllimport global i32 +define i32* @dllimport(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %return + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ] +sw.bb1: + br label %return +sw.bb2: + br label %return +sw.default: + br label %return +return: + %retval.0 = phi i32* [ @dllimport_d, %sw.default ], [ @dllimport_c, %sw.bb2 ], [ @dllimport_b, %sw.bb1 ], [ @dllimport_a, %entry ] + ret i32* %retval.0 +; CHECK-LABEL: @dllimport( +; CHECK: switch i32 +; CHECK-NOT: @switch.table +} From c577e71bf580d344970406915e050a301ae8a28d Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 20 Jun 2014 01:05:28 +0000 Subject: [PATCH 0071/1155] [x86] Make the x86 PACKSSWB, PACKSSDW, PACKUSWB, and PACKUSDW instructions available as synthetic SDNodes PACKSS and PACKUS that will select to the correct instruction variants based on the return type. This allows us to use these rather important instructions when lowering vector shuffles. Also moves the relevant instruction definitions to be split out from the fully generic multiclasses to allow them to match these new SDNodes in the same way that the UNPCK instructions do. No functionality should actually be changed here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211332 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 +++ lib/Target/X86/X86ISelLowering.h | 2 + lib/Target/X86/X86InstrFragmentsSIMD.td | 4 + lib/Target/X86/X86InstrSSE.td | 151 ++++++++++++++++++++---- 4 files changed, 152 insertions(+), 21 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a7b6e707816f..09018a5e1187 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12513,6 +12513,20 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::x86_sse2_packssdw_128: + case Intrinsic::x86_sse2_packsswb_128: + case Intrinsic::x86_avx2_packssdw: + case Intrinsic::x86_avx2_packsswb: + return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_packuswb_128: + case Intrinsic::x86_sse41_packusdw: + case Intrinsic::x86_avx2_packuswb: + case Intrinsic::x86_avx2_packusdw: + return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), @@ -15286,6 +15300,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TESTM: return "X86ISD::TESTM"; case X86ISD::TESTNM: return "X86ISD::TESTNM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; + case X86ISD::PACKSS: return "X86ISD::PACKSS"; + case X86ISD::PACKUS: return "X86ISD::PACKUS"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index af2e4344e5b6..df9ab3aa0681 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -315,6 +315,8 @@ namespace llvm { KORTEST, // Several flavors of instructions with vector shuffle behaviors. + PACKSS, + PACKUS, PALIGNR, PSHUFD, PSHUFHW, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 1582f4388192..6f0fa9462770 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -224,6 +224,10 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; +def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; +def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>; +def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>; + def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 043b2f32c6ff..11c3f11f2cdc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4336,20 +4336,6 @@ defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, SSE_INTALU_ITINS_P, 0>; -//===---------------------------------------------------------------------===// -// SSE2 - Packed Integer Pack Instructions -//===---------------------------------------------------------------------===// - -defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, - int_x86_avx2_packsswb, - SSE_INTALU_ITINS_SHUFF_P, 0>; -defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, - int_x86_avx2_packssdw, - SSE_INTALU_ITINS_SHUFF_P, 0>; -defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, - int_x86_avx2_packuswb, - SSE_INTALU_ITINS_SHUFF_P, 0>; - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions //===---------------------------------------------------------------------===// @@ -4431,6 +4417,136 @@ let Predicates = [UseSSE2] in { (PSHUFDri VR128:$src1, imm:$imm)>; } +//===---------------------------------------------------------------------===// +// Packed Integer Pack Instructions (SSE & AVX) +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_pack opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, + bit Is2Addr = 1> { + def rr : PDI, + Sched<[WriteShuffle]>; + def rm : PDI, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +multiclass sse2_pack_y opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { + def Yrr : PDI, + Sched<[WriteShuffle]>; + def Yrm : PDI, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +multiclass sse4_pack opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, + bit Is2Addr = 1> { + def rr : SS48I, + Sched<[WriteShuffle]>; + def rm : SS48I, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +multiclass sse4_pack_y opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { + def Yrr : SS48I, + Sched<[WriteShuffle]>; + def Yrm : SS48I, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +let Predicates = [HasAVX] in { + defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, + bc_v8i16, 0>, VEX_4V; + defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, + bc_v4i32, 0>, VEX_4V; + + defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, + bc_v8i16, 0>, VEX_4V; + defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, + bc_v4i32, 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { + defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss, + bc_v16i16>, VEX_4V, VEX_L; + defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, + bc_v8i32>, VEX_4V, VEX_L; + + defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus, + bc_v16i16>, VEX_4V, VEX_L; + defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, + bc_v8i32>, VEX_4V, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, + bc_v8i16>; + defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, + bc_v4i32>; + + defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, + bc_v8i16>; + + let Predicates = [HasSSE41] in + defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, + bc_v4i32>; +} +} // ExeDomain = SSEPackedInt + //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Unpack Instructions //===---------------------------------------------------------------------===// @@ -7053,8 +7169,6 @@ multiclass SS48I_binop_rm2 opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX] in { let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, - 0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V; defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; @@ -7086,9 +7200,6 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", - int_x86_avx2_packusdw, WriteShuffle>, - VEX_4V, VEX_L; defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; @@ -7120,8 +7231,6 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in { let isCommutable = 0 in - defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw, - 1, DEFAULT_ITINS_SHUFFLESCHED>; defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, From 16e592a6fe237900a7fa347c732f29395013fc34 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 01:30:43 +0000 Subject: [PATCH 0072/1155] Support: Write ScaledNumbers::getRounded() Start extracting helper functions out of -block-freq's `UnsignedFloat` into `Support/ScaledNumber.h` with the eventual goal of moving and renaming the class to `ScaledNumber`. The bike shed about names is still being painted, but I'm going with this for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211333 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 10 ++-- include/llvm/Support/ScaledNumber.h | 57 ++++++++++++++++++ unittests/Support/CMakeLists.txt | 1 + unittests/Support/ScaledNumberTest.cpp | 60 +++++++++++++++++++ 4 files changed, 123 insertions(+), 5 deletions(-) create mode 100644 include/llvm/Support/ScaledNumber.h create mode 100644 unittests/Support/ScaledNumberTest.cpp diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index bd72d3ed6d4d..7ad76e5350e2 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -22,6 +22,7 @@ #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ScaledNumber.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -343,12 +344,11 @@ template class UnsignedFloat : UnsignedFloatBase { } static UnsignedFloat getRounded(UnsignedFloat P, bool Round) { - if (!Round) + // Saturate. + if (P.isLargest()) return P; - if (P.Digits == DigitsLimits::max()) - // Careful of overflow in the exponent. - return UnsignedFloat(1, P.Exponent) <<= Width; - return UnsignedFloat(P.Digits + 1, P.Exponent); + + return ScaledNumbers::getRounded(P.Digits, P.Exponent, Round); } }; diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h new file mode 100644 index 000000000000..afdc7dc1888a --- /dev/null +++ b/include/llvm/Support/ScaledNumber.h @@ -0,0 +1,57 @@ +//===- llvm/Support/ScaledNumber.h - Support for scaled numbers -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions (and a class) useful for working with scaled +// numbers -- in particular, pairs of integers where one represents digits and +// another represents a scale. The functions are helpers and live in the +// namespace ScaledNumbers. The class ScaledNumber is useful for modelling +// certain cost metrics that need simple, integer-like semantics that are easy +// to reason about. +// +// These might remind you of soft-floats. If you want one of those, you're in +// the wrong place. Look at include/llvm/ADT/APFloat.h instead. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_SCALEDNUMBER_H +#define LLVM_SUPPORT_SCALEDNUMBER_H + +#include +#include +#include + +namespace llvm { +namespace ScaledNumbers { + +/// \brief Get the width of a number. +template inline int getWidth() { return sizeof(DigitsT) * 8; } + +/// \brief Conditionally round up a scaled number. +/// +/// Given \c Digits and \c Scale, round up iff \c ShouldRound is \c true. +/// Always returns \c Scale unless there's an overflow, in which case it +/// returns \c 1+Scale. +/// +/// \pre adding 1 to \c Scale will not overflow INT16_MAX. +template +inline std::pair getRounded(DigitsT Digits, int16_t Scale, + bool ShouldRound) { + static_assert(!std::numeric_limits::is_signed, "expected unsigned"); + + if (ShouldRound) + if (!++Digits) + // Overflow. + return std::make_pair(DigitsT(1) << (getWidth() - 1), Scale + 1); + return std::make_pair(Digits, Scale); +} +} +} + +#endif + diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt index c50acdfb8e64..08096a4a179a 100644 --- a/unittests/Support/CMakeLists.txt +++ b/unittests/Support/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_unittest(SupportTests ProcessTest.cpp ProgramTest.cpp RegexTest.cpp + ScaledNumberTest.cpp SourceMgrTest.cpp StringPool.cpp SwapByteOrderTest.cpp diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp new file mode 100644 index 000000000000..1fa54ff81d75 --- /dev/null +++ b/unittests/Support/ScaledNumberTest.cpp @@ -0,0 +1,60 @@ +//===- llvm/unittest/Support/ScaledNumberTest.cpp - ScaledPair tests -----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ScaledNumber.h" + +#include "llvm/Support/DataTypes.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::ScaledNumbers; + +namespace { + +template struct ScaledPair { + UIntT D; + int S; + ScaledPair(const std::pair &F) : D(F.first), S(F.second) {} + ScaledPair(UIntT D, int S) : D(D), S(S) {} + + bool operator==(const ScaledPair &X) const { + return D == X.D && S == X.S; + } +}; +template +bool operator==(const std::pair &L, + const ScaledPair &R) { + return ScaledPair(L) == R; +} +template +void PrintTo(const ScaledPair &F, ::std::ostream *os) { + *os << F.D << "*2^" << F.S; +} + +typedef ScaledPair SP32; +typedef ScaledPair SP64; + +TEST(ScaledNumberHelpersTest, getRounded) { + EXPECT_EQ(getRounded(0, 0, false), SP32(0, 0)); + EXPECT_EQ(getRounded(0, 0, true), SP32(1, 0)); + EXPECT_EQ(getRounded(20, 21, true), SP32(21, 21)); + EXPECT_EQ(getRounded(UINT32_MAX, 0, false), SP32(UINT32_MAX, 0)); + EXPECT_EQ(getRounded(UINT32_MAX, 0, true), SP32(1 << 31, 1)); + + EXPECT_EQ(getRounded(0, 0, false), SP64(0, 0)); + EXPECT_EQ(getRounded(0, 0, true), SP64(1, 0)); + EXPECT_EQ(getRounded(20, 21, true), SP64(21, 21)); + EXPECT_EQ(getRounded(UINT32_MAX, 0, false), SP64(UINT32_MAX, 0)); + EXPECT_EQ(getRounded(UINT32_MAX, 0, true), + SP64(UINT64_C(1) << 32, 0)); + EXPECT_EQ(getRounded(UINT64_MAX, 0, false), SP64(UINT64_MAX, 0)); + EXPECT_EQ(getRounded(UINT64_MAX, 0, true), + SP64(UINT64_C(1) << 63, 1)); +} +} From caa2bd600c65e9c04d24fe0199d6a61e215d4682 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 20 Jun 2014 01:36:00 +0000 Subject: [PATCH 0073/1155] Fix .cpp files claiming to be header files git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211334 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Atomic.cpp | 2 +- lib/Support/DynamicLibrary.cpp | 2 +- lib/Support/Host.cpp | 2 +- lib/Support/Process.cpp | 2 +- lib/Support/Program.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp index 2ef32b08ef28..ac4ff3eb5c66 100644 --- a/lib/Support/Atomic.cpp +++ b/lib/Support/Atomic.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements atomic operations. +// This file implements atomic operations. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 82d7c0cc6d19..d2b551e8a0a6 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system DynamicLibrary concept. +// This file implements the operating system DynamicLibrary concept. // // FIXME: This file leaks ExplicitSymbols and OpenedHandles! // diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index ce0a3b6bed77..03187e97eef7 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system Host concept. +// This file implements the operating system Host concept. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index 0380ed955dd5..087c459e187e 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system Process concept. +// This file implements the operating system Process concept. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp index eb700e3a8591..b84b82b1f10b 100644 --- a/lib/Support/Program.cpp +++ b/lib/Support/Program.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system Program concept. +// This file implements the operating system Program concept. // //===----------------------------------------------------------------------===// From 8483dacfd804c71adc73764cee7f1304d1c8977c Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 01:37:35 +0000 Subject: [PATCH 0074/1155] Small clanups: Use static instead of anonymous namespace. Delete write only variables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211335 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/gold/gold-plugin.cpp | 58 +++++++++++++++----------------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 4e273de3404a..3c2da94af532 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -50,35 +50,30 @@ using namespace llvm; namespace { - ld_plugin_status discard_message(int level, const char *format, ...) { - // Die loudly. Recent versions of Gold pass ld_plugin_message as the first - // callback in the transfer vector. This should never be called. - abort(); - } +struct claimed_file { + void *handle; + std::vector syms; +}; +} - ld_plugin_add_symbols add_symbols = NULL; - ld_plugin_get_symbols get_symbols = NULL; - ld_plugin_add_input_file add_input_file = NULL; - ld_plugin_add_input_library add_input_library = NULL; - ld_plugin_set_extra_library_path set_extra_library_path = NULL; - ld_plugin_get_view get_view = NULL; - ld_plugin_message message = discard_message; - - int api_version = 0; - int gold_version = 0; - - struct claimed_file { - void *handle; - std::vector syms; - }; - - lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC; - std::string output_name = ""; - std::list Modules; - std::vector Cleanup; - LTOCodeGenerator *CodeGen = nullptr; - StringSet<> CannotBeHidden; +static ld_plugin_status discard_message(int level, const char *format, ...) { + // Die loudly. Recent versions of Gold pass ld_plugin_message as the first + // callback in the transfer vector. This should never be called. + abort(); } + +static ld_plugin_add_symbols add_symbols = NULL; +static ld_plugin_get_symbols get_symbols = NULL; +static ld_plugin_add_input_file add_input_file = NULL; +static ld_plugin_set_extra_library_path set_extra_library_path = NULL; +static ld_plugin_get_view get_view = NULL; +static ld_plugin_message message = discard_message; +static lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC; +static std::string output_name = ""; +static std::list Modules; +static std::vector Cleanup; +static LTOCodeGenerator *CodeGen = nullptr; +static StringSet<> CannotBeHidden; static llvm::TargetOptions TargetOpts; namespace options { @@ -151,12 +146,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) { for (; tv->tv_tag != LDPT_NULL; ++tv) { switch (tv->tv_tag) { - case LDPT_API_VERSION: - api_version = tv->tv_u.tv_val; - break; - case LDPT_GOLD_VERSION: // major * 100 + minor - gold_version = tv->tv_u.tv_val; - break; case LDPT_OUTPUT_NAME: output_name = tv->tv_u.tv_string; break; @@ -213,9 +202,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) { case LDPT_ADD_INPUT_FILE: add_input_file = tv->tv_u.tv_add_input_file; break; - case LDPT_ADD_INPUT_LIBRARY: - add_input_library = tv->tv_u.tv_add_input_file; - break; case LDPT_SET_EXTRA_LIBRARY_PATH: set_extra_library_path = tv->tv_u.tv_set_extra_library_path; break; From 5cf39383da201784a40eb47cfd289cbe98972145 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 02:31:03 +0000 Subject: [PATCH 0075/1155] Support: Write ScaledNumbers::getAdjusted() git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211336 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 24 ++++++------- include/llvm/Support/ScaledNumber.h | 35 +++++++++++++++++++ unittests/Support/ScaledNumberTest.cpp | 23 ++++++++++++ 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 7ad76e5350e2..264aff372581 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -327,20 +327,16 @@ template class UnsignedFloat : UnsignedFloatBase { return countLeadingZeros32(Digits) + Width - 32; } - static UnsignedFloat adjustToWidth(uint64_t N, int32_t S) { - assert(S >= MinExponent); - assert(S <= MaxExponent); - if (Width == 64 || N <= DigitsLimits::max()) - return UnsignedFloat(N, S); - - // Shift right. - int Shift = 64 - Width - countLeadingZeros64(N); - DigitsType Shifted = N >> Shift; - - // Round. - assert(S + Shift <= MaxExponent); - return getRounded(UnsignedFloat(Shifted, S + Shift), - N & UINT64_C(1) << (Shift - 1)); + /// \brief Adjust a number to width, rounding up if necessary. + /// + /// Should only be called for \c Shift close to zero. + /// + /// \pre Shift >= MinExponent && Shift + 64 <= MaxExponent. + static UnsignedFloat adjustToWidth(uint64_t N, int32_t Shift) { + assert(Shift >= MinExponent && "Shift should be close to 0"); + assert(Shift <= MaxExponent - 64 && "Shift should be close to 0"); + auto Adjusted = ScaledNumbers::getAdjusted(N, Shift); + return Adjusted; } static UnsignedFloat getRounded(UnsignedFloat P, bool Round) { diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index afdc7dc1888a..54b5386c5ac2 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -22,6 +22,8 @@ #ifndef LLVM_SUPPORT_SCALEDNUMBER_H #define LLVM_SUPPORT_SCALEDNUMBER_H +#include "llvm/Support/MathExtras.h" + #include #include #include @@ -50,6 +52,39 @@ inline std::pair getRounded(DigitsT Digits, int16_t Scale, return std::make_pair(DigitsT(1) << (getWidth() - 1), Scale + 1); return std::make_pair(Digits, Scale); } + +/// \brief Adjust a 64-bit scaled number down to the appropriate width. +/// +/// Adjust a soft float with 64-bits of digits down, keeping as much +/// information as possible, and rounding up on half. +/// +/// \pre Adding 1 to \c Scale will not overflow INT16_MAX. +template +inline std::pair getAdjusted(uint64_t Digits, + int16_t Scale = 0) { + static_assert(!std::numeric_limits::is_signed, "expected unsigned"); + + const int Width = getWidth(); + if (Width == 64 || Digits <= std::numeric_limits::max()) + return std::make_pair(Digits, Scale); + + // Shift right and round. + int Shift = 64 - Width - countLeadingZeros(Digits); + return getRounded(Digits >> Shift, Scale + Shift, + Digits & (UINT64_C(1) << (Shift - 1))); +} + +/// \brief Convenience helper for adjusting to 32 bits. +inline std::pair getAdjusted32(uint64_t Digits, + int16_t Scale = 0) { + return getAdjusted(Digits, Scale); +} + +/// \brief Convenience helper for adjusting to 64 bits. +inline std::pair getAdjusted64(uint64_t Digits, + int16_t Scale = 0) { + return getAdjusted(Digits, Scale); +} } } diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp index 1fa54ff81d75..dac24051a62d 100644 --- a/unittests/Support/ScaledNumberTest.cpp +++ b/unittests/Support/ScaledNumberTest.cpp @@ -57,4 +57,27 @@ TEST(ScaledNumberHelpersTest, getRounded) { EXPECT_EQ(getRounded(UINT64_MAX, 0, true), SP64(UINT64_C(1) << 63, 1)); } + +TEST(FloatsTest, getAdjusted) { + const uint64_t Max32In64 = UINT32_MAX; + EXPECT_EQ(getAdjusted32(0), SP32(0, 0)); + EXPECT_EQ(getAdjusted32(0, 5), SP32(0, 5)); + EXPECT_EQ(getAdjusted32(UINT32_MAX), SP32(UINT32_MAX, 0)); + EXPECT_EQ(getAdjusted32(Max32In64 << 1), SP32(UINT32_MAX, 1)); + EXPECT_EQ(getAdjusted32(Max32In64 << 1, 1), SP32(UINT32_MAX, 2)); + EXPECT_EQ(getAdjusted32(Max32In64 << 31), SP32(UINT32_MAX, 31)); + EXPECT_EQ(getAdjusted32(Max32In64 << 32), SP32(UINT32_MAX, 32)); + EXPECT_EQ(getAdjusted32(Max32In64 + 1), SP32(1u << 31, 1)); + EXPECT_EQ(getAdjusted32(UINT64_MAX), SP32(1u << 31, 33)); + + EXPECT_EQ(getAdjusted64(0), SP64(0, 0)); + EXPECT_EQ(getAdjusted64(0, 5), SP64(0, 5)); + EXPECT_EQ(getAdjusted64(UINT32_MAX), SP64(UINT32_MAX, 0)); + EXPECT_EQ(getAdjusted64(Max32In64 << 1), SP64(Max32In64 << 1, 0)); + EXPECT_EQ(getAdjusted64(Max32In64 << 1, 1), SP64(Max32In64 << 1, 1)); + EXPECT_EQ(getAdjusted64(Max32In64 << 31), SP64(Max32In64 << 31, 0)); + EXPECT_EQ(getAdjusted64(Max32In64 << 32), SP64(Max32In64 << 32, 0)); + EXPECT_EQ(getAdjusted64(Max32In64 + 1), SP64(Max32In64 + 1, 0)); + EXPECT_EQ(getAdjusted64(UINT64_MAX), SP64(UINT64_MAX, 0)); +} } From af08b8b820acc71b02e7e8e4d0de3679c7971773 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 02:31:07 +0000 Subject: [PATCH 0076/1155] Support: Clean up getRounded() tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211337 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/ScaledNumber.h | 12 ++++++++++++ unittests/Support/ScaledNumberTest.cpp | 26 ++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index 54b5386c5ac2..f6594ec6aa55 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -53,6 +53,18 @@ inline std::pair getRounded(DigitsT Digits, int16_t Scale, return std::make_pair(Digits, Scale); } +/// \brief Convenience helper for 32-bit rounding. +inline std::pair getRounded32(uint32_t Digits, int16_t Scale, + bool ShouldRound) { + return getRounded(Digits, Scale, ShouldRound); +} + +/// \brief Convenience helper for 64-bit rounding. +inline std::pair getRounded64(uint64_t Digits, int16_t Scale, + bool ShouldRound) { + return getRounded(Digits, Scale, ShouldRound); +} + /// \brief Adjust a 64-bit scaled number down to the appropriate width. /// /// Adjust a soft float with 64-bits of digits down, keeping as much diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp index dac24051a62d..d1277d0479a6 100644 --- a/unittests/Support/ScaledNumberTest.cpp +++ b/unittests/Support/ScaledNumberTest.cpp @@ -41,21 +41,19 @@ typedef ScaledPair SP32; typedef ScaledPair SP64; TEST(ScaledNumberHelpersTest, getRounded) { - EXPECT_EQ(getRounded(0, 0, false), SP32(0, 0)); - EXPECT_EQ(getRounded(0, 0, true), SP32(1, 0)); - EXPECT_EQ(getRounded(20, 21, true), SP32(21, 21)); - EXPECT_EQ(getRounded(UINT32_MAX, 0, false), SP32(UINT32_MAX, 0)); - EXPECT_EQ(getRounded(UINT32_MAX, 0, true), SP32(1 << 31, 1)); + EXPECT_EQ(getRounded32(0, 0, false), SP32(0, 0)); + EXPECT_EQ(getRounded32(0, 0, true), SP32(1, 0)); + EXPECT_EQ(getRounded32(20, 21, true), SP32(21, 21)); + EXPECT_EQ(getRounded32(UINT32_MAX, 0, false), SP32(UINT32_MAX, 0)); + EXPECT_EQ(getRounded32(UINT32_MAX, 0, true), SP32(1 << 31, 1)); - EXPECT_EQ(getRounded(0, 0, false), SP64(0, 0)); - EXPECT_EQ(getRounded(0, 0, true), SP64(1, 0)); - EXPECT_EQ(getRounded(20, 21, true), SP64(21, 21)); - EXPECT_EQ(getRounded(UINT32_MAX, 0, false), SP64(UINT32_MAX, 0)); - EXPECT_EQ(getRounded(UINT32_MAX, 0, true), - SP64(UINT64_C(1) << 32, 0)); - EXPECT_EQ(getRounded(UINT64_MAX, 0, false), SP64(UINT64_MAX, 0)); - EXPECT_EQ(getRounded(UINT64_MAX, 0, true), - SP64(UINT64_C(1) << 63, 1)); + EXPECT_EQ(getRounded64(0, 0, false), SP64(0, 0)); + EXPECT_EQ(getRounded64(0, 0, true), SP64(1, 0)); + EXPECT_EQ(getRounded64(20, 21, true), SP64(21, 21)); + EXPECT_EQ(getRounded64(UINT32_MAX, 0, false), SP64(UINT32_MAX, 0)); + EXPECT_EQ(getRounded64(UINT32_MAX, 0, true), SP64(UINT64_C(1) << 32, 0)); + EXPECT_EQ(getRounded64(UINT64_MAX, 0, false), SP64(UINT64_MAX, 0)); + EXPECT_EQ(getRounded64(UINT64_MAX, 0, true), SP64(UINT64_C(1) << 63, 1)); } TEST(FloatsTest, getAdjusted) { From d2ce9392dca6349a22c8f3e21e74aa59a82d8900 Mon Sep 17 00:00:00 2001 From: Karthik Bhat Date: Fri, 20 Jun 2014 04:32:48 +0000 Subject: [PATCH 0077/1155] Add Support to Recognize and Vectorize NON SIMD instructions in SLPVectorizer. This patch adds support to recognize patterns such as fadd,fsub,fadd,fsub.../add,sub,add,sub... and vectorizes them as vector shuffles if they are profitable. These patterns of vector shuffle can later be converted to instructions such as addsubpd etc on X86. Thanks to Arnold and Hal for the reviews. http://reviews.llvm.org/D4015 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211339 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetTransformInfo.h | 1 + lib/CodeGen/BasicTargetTransformInfo.cpp | 23 +++ lib/Target/ARM/ARMTargetTransformInfo.cpp | 67 +++++--- lib/Target/X86/X86TargetTransformInfo.cpp | 46 ++++- lib/Transforms/Vectorize/SLPVectorizer.cpp | 165 ++++++++++++++++-- test/Transforms/SLPVectorizer/X86/addsub.ll | 181 ++++++++++++++++++++ 6 files changed, 441 insertions(+), 42 deletions(-) create mode 100644 test/Transforms/SLPVectorizer/X86/addsub.ll diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 79fe1dcae639..f57f3eb009a1 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -322,6 +322,7 @@ class TargetTransformInfo { enum ShuffleKind { SK_Broadcast, ///< Broadcast element 0 to all other elements. SK_Reverse, ///< Reverse the order of the vector. + SK_Alternate, ///< Choose alternate elements from vector. SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset. }; diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 7f31b1a982fc..b2737bf754f9 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo { /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + /// Estimate the cost overhead of SK_Alternate shuffle. + unsigned getAltShuffleOverhead(Type *Ty) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } public: @@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, return OpCost; } +unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { + assert(Ty->isVectorTy() && "Can only shuffle vectors"); + unsigned Cost = 0; + // Shuffle cost is equal to the cost of extracting element from its argument + // plus the cost of inserting them onto the result vector. + + // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index + // 0 of first vector, index 1 of second vector,index 2 of first vector and + // finally index 3 of second vector and insert them at index <0,1,2,3> of + // result vector. + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + return Cost; +} + unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { + if (Kind == SK_Alternate) { + return getAltShuffleOverhead(Tp); + } return 1; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 57df7da7f310..a2ace629baa1 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { - // We only handle costs of reverse shuffles for now. - if (Kind != SK_Reverse) + // We only handle costs of reverse and alternate shuffles for now. + if (Kind != SK_Reverse && Kind != SK_Alternate) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - static const CostTblEntry NEONShuffleTbl[] = { - // Reverse shuffle cost one instruction if we are shuffling within a double - // word (vrev) or two if we shuffle a quad word (vrev, vext). - { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, - - { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } - }; + if (Kind == SK_Reverse) { + static const CostTblEntry NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a + // double word (vrev) or two if we shuffle a quad word (vrev, vext). + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - std::pair LT = TLI->getTypeLegalizationCost(Tp); + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx == -1) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + std::pair LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - return LT.first * NEONShuffleTbl[Idx].Cost; + return LT.first * NEONShuffleTbl[Idx].Cost; + } + if (Kind == SK_Alternate) { + static const CostTblEntry NEONAltShuffleTbl[] = { + // Alt shuffle cost table for ARM. Cost is the number of instructions + // required to create the shuffled vector. + + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, + + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, + + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; + + std::pair LT = TLI->getTypeLegalizationCost(Tp); + int Idx = + CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return LT.first * NEONAltShuffleTbl[Idx].Cost; + } + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 299f9a581b8d..9fa911967e7a 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -402,17 +402,47 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { - // We only estimate the cost of reverse shuffles. - if (Kind != SK_Reverse) + // We only estimate the cost of reverse and alternate shuffles. + if (Kind != SK_Reverse && Kind != SK_Alternate) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - std::pair LT = TLI->getTypeLegalizationCost(Tp); - unsigned Cost = 1; - if (LT.second.getSizeInBits() > 128) - Cost = 3; // Extract + insert + copy. + if (Kind == SK_Reverse) { + std::pair LT = TLI->getTypeLegalizationCost(Tp); + unsigned Cost = 1; + if (LT.second.getSizeInBits() > 128) + Cost = 3; // Extract + insert + copy. - // Multiple by the number of parts. - return Cost * LT.first; + // Multiple by the number of parts. + return Cost * LT.first; + } + + if (Kind == SK_Alternate) { + static const CostTblEntry X86AltShuffleTbl[] = { + // Alt shuffle cost table for X86. Cost is the number of instructions + // required to create the shuffled vector. + + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 8}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, + + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 49}}; + + std::pair LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup(X86AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return LT.first * X86AltShuffleTbl[Idx].Cost; + } + + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index f18202be167e..53a43d9851e9 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -149,6 +149,48 @@ static bool isSplat(ArrayRef VL) { return true; } +///\returns Opcode that can be clubbed with \p Op to create an alternate +/// sequence which can later be merged as a ShuffleVector instruction. +static unsigned getAltOpcode(unsigned Op) { + switch (Op) { + case Instruction::FAdd: + return Instruction::FSub; + case Instruction::FSub: + return Instruction::FAdd; + case Instruction::Add: + return Instruction::Sub; + case Instruction::Sub: + return Instruction::Add; + default: + return 0; + } +} + +///\returns bool representing if Opcode \p Op can be part +/// of an alternate sequence which can later be merged as +/// a ShuffleVector instruction. +static bool canCombineAsAltInst(unsigned Op) { + if (Op == Instruction::FAdd || Op == Instruction::FSub || + Op == Instruction::Sub || Op == Instruction::Add) + return true; + return false; +} + +/// \returns ShuffleVector instruction if intructions in \p VL have +/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence. +/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...) +static unsigned isAltInst(ArrayRef VL) { + Instruction *I0 = dyn_cast(VL[0]); + unsigned Opcode = I0->getOpcode(); + unsigned AltOpcode = getAltOpcode(Opcode); + for (int i = 1, e = VL.size(); i < e; i++) { + Instruction *I = dyn_cast(VL[i]); + if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode)) + return 0; + } + return Instruction::ShuffleVector; +} + /// \returns The opcode if all of the Instructions in \p VL have the same /// opcode, or zero. static unsigned getSameOpcode(ArrayRef VL) { @@ -158,8 +200,11 @@ static unsigned getSameOpcode(ArrayRef VL) { unsigned Opcode = I0->getOpcode(); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast(VL[i]); - if (!I || Opcode != I->getOpcode()) + if (!I || Opcode != I->getOpcode()) { + if (canCombineAsAltInst(Opcode) && i == 1) + return isAltInst(VL); return 0; + } } return Opcode; } @@ -377,6 +422,7 @@ class BoUpSLP { /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); + private: struct TreeEntry; @@ -594,6 +640,7 @@ void BoUpSLP::buildTree(ArrayRef Roots, void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { bool SameTy = getSameType(VL); (void)SameTy; + bool isAltShuffle = false; assert(SameTy && "Invalid types!"); if (Depth == RecursionMaxDepth) { @@ -615,10 +662,19 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { newTreeEntry(VL, false); return; } + unsigned Opcode = getSameOpcode(VL); + + // Check that this shuffle vector refers to the alternate + // sequence of opcodes. + if (Opcode == Instruction::ShuffleVector) { + Instruction *I0 = dyn_cast(VL[0]); + unsigned Op = I0->getOpcode(); + if (Op != Instruction::ShuffleVector) + isAltShuffle = true; + } // If all of the operands are identical or constant we have a simple solution. - if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || - !getSameOpcode(VL)) { + if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) { DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); newTreeEntry(VL, false); return; @@ -754,8 +810,6 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); - unsigned Opcode = getSameOpcode(VL); - // Check if it is safe to sink the loads or the stores. if (Opcode == Instruction::Load || Opcode == Instruction::Store) { Instruction *Last = getLastInstruction(VL); @@ -1057,6 +1111,26 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { } return; } + case Instruction::ShuffleVector: { + // If this is not an alternate sequence of opcode like add-sub + // then do not vectorize this instruction. + if (!isAltShuffle) { + newTreeEntry(VL, false); + DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); + return; + } + newTreeEntry(VL, true); + DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast(VL[j])->getOperand(i)); + + buildTree_rec(Operands, Depth + 1); + } + return; + } default: newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); @@ -1080,11 +1154,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } return getGatherCost(E->Scalars); } - - assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) && - "Invalid VL"); + unsigned Opcode = getSameOpcode(VL); + assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL"); Instruction *VL0 = cast(VL[0]); - unsigned Opcode = VL0->getOpcode(); switch (Opcode) { case Instruction::PHI: { return 0; @@ -1242,6 +1314,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return VecCallCost - ScalarCallCost; } + case Instruction::ShuffleVector: { + TargetTransformInfo::OperandValueKind Op1VK = + TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Op2VK = + TargetTransformInfo::OK_AnyValue; + int ScalarCost = 0; + int VecCost = 0; + for (unsigned i = 0; i < VL.size(); ++i) { + Instruction *I = cast(VL[i]); + if (!I) + break; + ScalarCost += + TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK); + } + // VecCost is equal to sum of the cost of creating 2 vectors + // and the cost of creating shuffle. + Instruction *I0 = cast(VL[0]); + VecCost = + TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK); + Instruction *I1 = cast(VL[1]); + VecCost += + TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); + VecCost += + TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); + return VecCost - ScalarCost; + } default: llvm_unreachable("Unknown instruction"); } @@ -1522,9 +1620,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E->Scalars); return Gather(E->Scalars, VecTy); } - - unsigned Opcode = VL0->getOpcode(); - assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode"); + unsigned Opcode = getSameOpcode(E->Scalars); switch (Opcode) { case Instruction::PHI: { @@ -1797,6 +1893,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = V; return V; } + case Instruction::ShuffleVector: { + ValueList LHSVL, RHSVL; + for (int i = 0, e = E->Scalars.size(); i < e; ++i) { + LHSVL.push_back(cast(E->Scalars[i])->getOperand(0)); + RHSVL.push_back(cast(E->Scalars[i])->getOperand(1)); + } + setInsertPointAfterBundle(E->Scalars); + + Value *LHS = vectorizeTree(LHSVL); + Value *RHS = vectorizeTree(RHSVL); + + if (Value *V = alreadyVectorized(E->Scalars)) + return V; + + // Create a vector of LHS op1 RHS + BinaryOperator *BinOp0 = cast(VL0); + Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS); + + // Create a vector of LHS op2 RHS + Instruction *VL1 = cast(E->Scalars[1]); + BinaryOperator *BinOp1 = cast(VL1); + Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS); + + // Create appropriate shuffle to take alternative operations from + // the vector. + std::vector Mask(E->Scalars.size()); + unsigned e = E->Scalars.size(); + for (unsigned i = 0; i < e; ++i) { + if (i & 1) + Mask[i] = Builder.getInt32(e + i); + else + Mask[i] = Builder.getInt32(i); + } + + Value *ShuffleMask = ConstantVector::get(Mask); + + Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask); + E->VectorizedValue = V; + if (Instruction *I = dyn_cast(V)) + return propagateMetadata(I, E->Scalars); + + return V; + } default: llvm_unreachable("unknown inst"); } @@ -1865,7 +2004,6 @@ Value *BoUpSLP::vectorizeTree() { // For each lane: for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; - // No need to handle users of gathered values. if (Entry->NeedToGather) continue; @@ -2049,7 +2187,6 @@ struct SLPVectorizer : public FunctionPass { for (po_iterator it = po_begin(&F.getEntryBlock()), e = po_end(&F.getEntryBlock()); it != e; ++it) { BasicBlock *BB = *it; - // Vectorize trees that end at stores. if (unsigned count = collectStores(BB, R)) { (void)count; diff --git a/test/Transforms/SLPVectorizer/X86/addsub.ll b/test/Transforms/SLPVectorizer/X86/addsub.ll new file mode 100644 index 000000000000..8303bc8181ef --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/addsub.ll @@ -0,0 +1,181 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@b = common global [4 x i32] zeroinitializer, align 16 +@c = common global [4 x i32] zeroinitializer, align 16 +@d = common global [4 x i32] zeroinitializer, align 16 +@e = common global [4 x i32] zeroinitializer, align 16 +@a = common global [4 x i32] zeroinitializer, align 16 +@fb = common global [4 x float] zeroinitializer, align 16 +@fc = common global [4 x float] zeroinitializer, align 16 +@fa = common global [4 x float] zeroinitializer, align 16 + +; CHECK-LABEL: @addsub +; CHECK: %5 = add <4 x i32> %3, %4 +; CHECK: %6 = add <4 x i32> %2, %5 +; CHECK: %7 = sub <4 x i32> %2, %5 +; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> + +; Function Attrs: nounwind uwtable +define void @addsub() #0 { +entry: + %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4 + %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4 + %add = add nsw i32 %0, %1 + %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4 + %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4 + %add1 = add nsw i32 %2, %3 + %add2 = add nsw i32 %add, %add1 + store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4 + %add3 = add nsw i32 %4, %5 + %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4 + %add4 = add nsw i32 %6, %7 + %sub = sub nsw i32 %add3, %add4 + store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4 + %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4 + %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4 + %add5 = add nsw i32 %8, %9 + %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4 + %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4 + %add6 = add nsw i32 %10, %11 + %add7 = add nsw i32 %add5, %add6 + store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4 + %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4 + %add8 = add nsw i32 %12, %13 + %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4 + %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4 + %add9 = add nsw i32 %14, %15 + %sub10 = sub nsw i32 %add8, %add9 + store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @subadd +; CHECK: %5 = add <4 x i32> %3, %4 +; CHECK: %6 = sub <4 x i32> %2, %5 +; CHECK: %7 = add <4 x i32> %2, %5 +; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> + +; Function Attrs: nounwind uwtable +define void @subadd() #0 { +entry: + %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4 + %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4 + %add = add nsw i32 %0, %1 + %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4 + %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4 + %add1 = add nsw i32 %2, %3 + %sub = sub nsw i32 %add, %add1 + store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4 + %add2 = add nsw i32 %4, %5 + %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4 + %add3 = add nsw i32 %6, %7 + %add4 = add nsw i32 %add2, %add3 + store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4 + %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4 + %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4 + %add5 = add nsw i32 %8, %9 + %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4 + %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4 + %add6 = add nsw i32 %10, %11 + %sub7 = sub nsw i32 %add5, %add6 + store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4 + %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4 + %add8 = add nsw i32 %12, %13 + %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4 + %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4 + %add9 = add nsw i32 %14, %15 + %add10 = add nsw i32 %add8, %add9 + store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @faddfsub +; CHECK: %2 = fadd <4 x float> %0, %1 +; CHECK: %3 = fsub <4 x float> %0, %1 +; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> +; Function Attrs: nounwind uwtable +define void @faddfsub() #0 { +entry: + %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4 + %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4 + %add = fadd float %0, %1 + store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4 + %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4 + %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4 + %sub = fsub float %2, %3 + store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4 + %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4 + %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4 + %add1 = fadd float %4, %5 + store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4 + %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4 + %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4 + %sub2 = fsub float %6, %7 + store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @fsubfadd +; CHECK: %2 = fsub <4 x float> %0, %1 +; CHECK: %3 = fadd <4 x float> %0, %1 +; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> +; Function Attrs: nounwind uwtable +define void @fsubfadd() #0 { +entry: + %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4 + %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4 + %sub = fsub float %0, %1 + store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4 + %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4 + %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4 + %add = fadd float %2, %3 + store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4 + %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4 + %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4 + %sub1 = fsub float %4, %5 + store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4 + %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4 + %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4 + %add2 = fadd float %6, %7 + store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4 + ret void +} + +; CHECK-LABEL: @No_faddfsub +; CHECK-NOT: fadd <4 x float> +; CHECK-NOT: fsub <4 x float> +; CHECK-NOT: shufflevector +; Function Attrs: nounwind uwtable +define void @No_faddfsub() #0 { +entry: + %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4 + %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4 + %add = fadd float %0, %1 + store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4 + %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4 + %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4 + %add1 = fadd float %2, %3 + store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4 + %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4 + %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4 + %add2 = fadd float %4, %5 + store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4 + %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4 + %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4 + %sub = fsub float %6, %7 + store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4 + ret void +} + +attributes #0 = { nounwind } + From 14d8c9e5502b58aead5d390416c3c4eea31841d1 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Fri, 20 Jun 2014 04:36:29 +0000 Subject: [PATCH 0078/1155] ArgList: use MakeArgList overloads in subclasses and clean up some calls. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211340 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Option/ArgList.h | 2 ++ lib/Option/ArgList.cpp | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h index ab40a1a0d40a..af8cae15fc2d 100644 --- a/include/llvm/Option/ArgList.h +++ b/include/llvm/Option/ArgList.h @@ -328,6 +328,7 @@ class InputArgList : public ArgList { unsigned MakeIndex(StringRef String0) const; unsigned MakeIndex(StringRef String0, StringRef String1) const; + using ArgList::MakeArgString; const char *MakeArgString(StringRef Str) const override; /// @} @@ -365,6 +366,7 @@ class DerivedArgList : public ArgList { /// (to be freed). void AddSynthesizedArg(Arg *A); + using ArgList::MakeArgString; const char *MakeArgString(StringRef Str) const override; /// AddFlagArg - Construct a new FlagArg for the given option \p Id and diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index a5ab8d747d87..28b42a8f8290 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -350,30 +350,27 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) { } Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const { - SynthesizedArgs.push_back(make_unique( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - BaseArgs.MakeIndex(Opt.getName()), BaseArg)); + SynthesizedArgs.push_back( + make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + BaseArgs.MakeIndex(Opt.getName()), BaseArg)); return SynthesizedArgs.back().get(); } Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Value); - SynthesizedArgs.push_back(make_unique( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index), BaseArg)); + SynthesizedArgs.push_back( + make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + Index, BaseArgs.getArgString(Index), BaseArg)); return SynthesizedArgs.back().get(); } Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value); - SynthesizedArgs.push_back(make_unique( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index + 1), BaseArg)); + SynthesizedArgs.push_back( + make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + Index, BaseArgs.getArgString(Index + 1), BaseArg)); return SynthesizedArgs.back().get(); } @@ -381,8 +378,7 @@ Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str()); SynthesizedArgs.push_back(make_unique( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); + Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, + BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); return SynthesizedArgs.back().get(); } From 7d1f7798dbb2242bceb4b8578d4b0de0788088c4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 20 Jun 2014 06:50:05 +0000 Subject: [PATCH 0079/1155] R600: Trivial subtarget feature cleanups. Remove an unused AMDIL leftover, correct extra periods appearing in the help menu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211341 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.td | 15 +++++---------- lib/Target/R600/AMDGPUSubtarget.h | 1 - 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index d3dff531a7f6..6ff9ab7ab7d2 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -32,30 +32,25 @@ def FeatureIfCvt : SubtargetFeature <"disable-ifcvt", "false", "Disable the if conversion pass">; -def FeatureFP64 : SubtargetFeature<"fp64", +def FeatureFP64 : SubtargetFeature<"fp64", "FP64", "true", - "Enable 64bit double precision operations">; + "Enable double precision operations">; def Feature64BitPtr : SubtargetFeature<"64BitPtr", "Is64bit", "true", - "Specify if 64bit addressing should be used.">; - -def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr", - "Is32on64bit", - "false", - "Specify if 64bit sized pointers with 32bit addressing should be used.">; + "Specify if 64-bit addressing should be used">; def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", "R600ALUInst", "false", - "Older version of ALU instructions encoding.">; + "Older version of ALU instructions encoding">; def FeatureVertexCache : SubtargetFeature<"HasVertexCache", "HasVertexCache", "true", - "Specify use of dedicated vertex cache.">; + "Specify use of dedicated vertex cache">; def FeatureCaymanISA : SubtargetFeature<"caymanISA", "CaymanISA", diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 9c78f35df3e2..bbbd997f6af8 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -44,7 +44,6 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { private: std::string DevName; bool Is64bit; - bool Is32on64bit; bool DumpCode; bool R600ALUInst; bool HasVertexCache; From a5efeb6b39304f5d34d74a5e32e5142d84eb2a43 Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Fri, 20 Jun 2014 09:28:09 +0000 Subject: [PATCH 0080/1155] ps][mips64r6] Added LSA/DLSA instructions Differential Revision: http://reviews.llvm.org/D3897 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211346 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips32r6InstrFormats.td | 19 +++++++++++++++++++ lib/Target/Mips/Mips32r6InstrInfo.td | 14 +++++++++++++- lib/Target/Mips/Mips64r6InstrInfo.td | 4 +++- test/CodeGen/Mips/msa/special.ll | 4 ++++ test/MC/Mips/mips32r6/valid.s | 1 + test/MC/Mips/mips64r6/valid.s | 2 ++ 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td index 051db7525f6d..9b7ada68ee26 100644 --- a/lib/Target/Mips/Mips32r6InstrFormats.td +++ b/lib/Target/Mips/Mips32r6InstrFormats.td @@ -95,6 +95,8 @@ def OPCODE6_CLO : OPCODE6<0b010001>; def OPCODE6_CLZ : OPCODE6<0b010000>; def OPCODE6_DCLO : OPCODE6<0b010011>; def OPCODE6_DCLZ : OPCODE6<0b010010>; +def OPCODE6_LSA : OPCODE6<0b000101>; +def OPCODE6_DLSA : OPCODE6<0b010101>; class FIELD_FMT Val> { bits<5> Value = Val; @@ -453,6 +455,23 @@ class SPECIAL3_LL_SC_FM : MipsR6Inst { string DecoderMethod = "DecodeSpecial3LlSc"; } +class SPECIAL_LSA_FM : MipsR6Inst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<2> imm2; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL.Value; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-8} = 0b000; + let Inst{7-6} = imm2; + let Inst{5-0} = Operation.Value; +} + class REGIMM_FM : MipsR6Inst { bits<5> rs; bits<16> imm; diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 52e12fcf9462..9ec41a20b394 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -160,6 +160,8 @@ class LWC2_R6_ENC : COP2LDST_FM; class SDC2_R6_ENC : COP2LDST_FM; class SWC2_R6_ENC : COP2LDST_FM; +class LSA_R6_ENC : SPECIAL_LSA_FM; + class LL_R6_ENC : SPECIAL3_LL_SC_FM; class SC_R6_ENC : SPECIAL3_LL_SC_FM; @@ -574,6 +576,16 @@ class COP2ST_DESC_BASE { class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>; class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>; +class LSA_R6_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $imm2"); + list Pattern = []; +} + +class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>; + class LL_R6_DESC_BASE { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins mem_simm9:$addr); @@ -667,7 +679,7 @@ def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6; def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6; def LDC2_R6 : LDC2_R6_ENC, LDC2_R6_DESC, ISA_MIPS32R6; def LL_R6 : LL_R6_ENC, LL_R6_DESC, ISA_MIPS32R6; -// def LSA; // See MSA +def LSA_R6 : LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6; def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6; def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6; def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index dbfb9fbbf9bc..96d111f64b26 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -29,6 +29,7 @@ class DCLO_R6_ENC : SPECIAL_2R_FM; class DCLZ_R6_ENC : SPECIAL_2R_FM; class DDIV_ENC : SPECIAL_3R_FM<0b00010, 0b011110>; class DDIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011111>; +class DLSA_R6_ENC : SPECIAL_LSA_FM; class DMOD_ENC : SPECIAL_3R_FM<0b00011, 0b011110>; class DMODU_ENC : SPECIAL_3R_FM<0b00011, 0b011111>; class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b111000>; @@ -61,6 +62,7 @@ class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>; class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>; class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>; class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>; +class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>; class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>; class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>; class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>; @@ -88,7 +90,7 @@ def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6; def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6; def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6; def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6; -// def DLSA; // See MSA +def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6; def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6; def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6; def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6; diff --git a/test/CodeGen/Mips/msa/special.ll b/test/CodeGen/Mips/msa/special.ll index f65a14f7bb1f..b9badf5dc582 100644 --- a/test/CodeGen/Mips/msa/special.ll +++ b/test/CodeGen/Mips/msa/special.ll @@ -4,6 +4,10 @@ ; RUN: FileCheck %s --check-prefix=MIPS32 ; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \ ; RUN: FileCheck %s --check-prefix=MIPS64 +; RUN: llc -march=mips -mcpu=mips32r6 -mattr=+msa < %s | \ +; RUN: FileCheck %s --check-prefix=MIPS32 +; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=+msa < %s | \ +; RUN: FileCheck %s --check-prefix=MIPS64 define i32 @llvm_mips_lsa_test(i32 %a, i32 %b) nounwind { entry: diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s index 0766079fda5b..e6a6a6636158 100644 --- a/test/MC/Mips/mips32r6/valid.s +++ b/test/MC/Mips/mips32r6/valid.s @@ -100,6 +100,7 @@ divu $2,$3,$4 # CHECK: divu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9b] jialc $5, 256 # CHECK: jialc $5, 256 # encoding: [0xf8,0x05,0x01,0x00] jic $5, 256 # CHECK: jic $5, 256 # encoding: [0xd8,0x05,0x01,0x00] + lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5] lwpc $2,268 # CHECK: lwpc $2, 268 # encoding: [0xec,0x48,0x00,0x43] lwupc $2,268 # CHECK: lwupc $2, 268 # encoding: [0xec,0x50,0x00,0x43] mod $2,$3,$4 # CHECK: mod $2, $3, $4 # encoding: [0x00,0x64,0x10,0xda] diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s index 9f32562191a4..1148a5246609 100644 --- a/test/MC/Mips/mips64r6/valid.s +++ b/test/MC/Mips/mips64r6/valid.s @@ -111,6 +111,8 @@ ddivu $2,$3,$4 # CHECK: ddivu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9f] dmod $2,$3,$4 # CHECK: dmod $2, $3, $4 # encoding: [0x00,0x64,0x10,0xde] dmodu $2,$3,$4 # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf] + lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5] + dlsa $2, $3, $4, 3 # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xd5] ldpc $2,123456 # CHECK: ldpc $2, 123456 # encoding: [0xec,0x58,0x3c,0x48] lwpc $2,268 # CHECK: lwpc $2, 268 # encoding: [0xec,0x48,0x00,0x43] lwupc $2,268 # CHECK: lwupc $2, 268 # encoding: [0xec,0x50,0x00,0x43] From e5241cc48858af7c6ca58ae677b69323e8f84eb5 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 20 Jun 2014 10:08:11 +0000 Subject: [PATCH 0081/1155] Emit the ARM build attributes ABI_PCS_wchar_t and ABI_enum_size. Emit the ARM build attributes ABI_PCS_wchar_t and ABI_enum_size based on module flags metadata. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211349 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 36 +++++++++++++++++++++++ include/llvm/Support/ARMBuildAttributes.h | 15 ++++++++++ lib/Target/ARM/ARMAsmPrinter.cpp | 26 ++++++++++++++++ test/CodeGen/ARM/metadata-default.ll | 16 ++++++++++ test/CodeGen/ARM/metadata-short-enums.ll | 16 ++++++++++ test/CodeGen/ARM/metadata-short-wchar.ll | 16 ++++++++++ 6 files changed, 125 insertions(+) create mode 100644 test/CodeGen/ARM/metadata-default.ll create mode 100644 test/CodeGen/ARM/metadata-short-enums.ll create mode 100644 test/CodeGen/ARM/metadata-short-wchar.ll diff --git a/docs/LangRef.rst b/docs/LangRef.rst index b75058fa9a5f..c535443e2c57 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -3146,6 +3146,42 @@ Each individual option is required to be either a valid option for the target's linker, or an option that is reserved by the target specific assembly writer or object file emitter. No other aspect of these options is defined by the IR. +C type width Module Flags Metadata +---------------------------------- + +The ARM backend emits a section into each generated object file describing the +options that it was compiled with (in a compiler-independent way) to prevent +linking incompatible objects, and to allow automatic library selection. Some +of these options are not visible at the IR level, namely wchar_t width and enum +width. + +To pass this information to the backend, these options are encoded in module +flags metadata, using the following key-value pairs: + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Key + - Value + + * - short_wchar + - * 0 --- sizeof(wchar_t) == 4 + * 1 --- sizeof(wchar_t) == 2 + + * - short_enum + - * 0 --- Enums are at least as large as an ``int``. + * 1 --- Enums are stored in the smallest integer type which can + represent all of its values. + +For example, the following metadata section specifies that the module was +compiled with a ``wchar_t`` width of 4 bytes, and the underlying type of an +enum is the smallest type which can represent all of its values:: + + !llvm.module.flags = !{!0, !1} + !0 = metadata !{i32 1, metadata !"short_wchar", i32 1} + !1 = metadata !{i32 1, metadata !"short_enum", i32 0} + .. _intrinsicglobalvariables: Intrinsic Global Variables diff --git a/include/llvm/Support/ARMBuildAttributes.h b/include/llvm/Support/ARMBuildAttributes.h index 16312004c871..f63e0a61f639 100644 --- a/include/llvm/Support/ARMBuildAttributes.h +++ b/include/llvm/Support/ARMBuildAttributes.h @@ -159,6 +159,11 @@ enum { AddressDirect = 1, // Address imported data directly AddressGOT = 2, // Address imported data indirectly (via GOT) + // Tag_ABI_PCS_wchar_t, (=18), uleb128 + WCharProhibited = 0, // wchar_t is not used + WCharWidth2Bytes = 2, // sizeof(wchar_t) == 2 + WCharWidth4Bytes = 4, // sizeof(wchar_t) == 4 + // Tag_ABI_FP_denormal, (=20), uleb128 PreserveFPSign = 2, // sign when flushed-to-zero is preserved @@ -166,6 +171,16 @@ enum { AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI]) AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings + // Tag_ABI_enum_size, (=26), uleb128 + EnumProhibited = 0, // The user prohibited the use of enums when building + // this entity. + EnumSmallest = 1, // Enum is smallest container big enough to hold all + // values. + Enum32Bit = 2, // Enum is at least 32 bits. + Enum32BitABI = 3, // Every enumeration visible across an ABI-complying + // interface contains a value needing 32 bits to encode + // it; other enums can be containerized. + // Tag_ABI_HardFP_use, (=27), uleb128 HardFPImplied = 0, // FP use should be implied by Tag_FP_arch HardFPSinglePrecision = 1, // Single-precision only diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 55e9fe5f5c57..d2c54f3b71b6 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -730,6 +730,32 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + if (MMI) { + if (const Module *SourceModule = MMI->getModule()) { + // ABI_PCS_wchar_t to indicate wchar_t width + // FIXME: There is no way to emit value 0 (wchar_t prohibited). + if (auto WCharWidthValue = cast_or_null( + SourceModule->getModuleFlag("wchar_size"))) { + int WCharWidth = WCharWidthValue->getZExtValue(); + assert((WCharWidth == 2 || WCharWidth == 4) && + "wchar_t width must be 2 or 4 bytes"); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth); + } + + // ABI_enum_size to indicate enum width + // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 + // (all enums contain a value needing 32 bits to encode). + if (auto EnumWidthValue = cast_or_null( + SourceModule->getModuleFlag("min_enum_size"))) { + int EnumWidth = EnumWidthValue->getZExtValue(); + assert((EnumWidth == 1 || EnumWidth == 4) && + "Minimum enum width must be 1 or 4 bytes"); + int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; + ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); + } + } + } + if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZVirtualization); diff --git a/test/CodeGen/ARM/metadata-default.ll b/test/CodeGen/ARM/metadata-default.ll new file mode 100644 index 000000000000..f6a3fe289cc1 --- /dev/null +++ b/test/CodeGen/ARM/metadata-default.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv7--none-eabi" + +define i32 @f(i64 %z) { + ret i32 0 +} + +!llvm.module.flags = !{!0, !1} + +!0 = metadata !{i32 1, metadata !"wchar_size", i32 4} +!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4} + +; CHECK: .eabi_attribute 18, 4 @ Tag_ABI_PCS_wchar_t +; CHECK: .eabi_attribute 26, 2 @ Tag_ABI_enum_size diff --git a/test/CodeGen/ARM/metadata-short-enums.ll b/test/CodeGen/ARM/metadata-short-enums.ll new file mode 100644 index 000000000000..bccd3327e5b5 --- /dev/null +++ b/test/CodeGen/ARM/metadata-short-enums.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv7--none-eabi" + +define i32 @f(i64 %z) { + ret i32 0 +} + +!llvm.module.flags = !{!0, !1} + +!0 = metadata !{i32 1, metadata !"wchar_size", i32 4} +!1 = metadata !{i32 1, metadata !"min_enum_size", i32 1} + +; CHECK: .eabi_attribute 18, 4 @ Tag_ABI_PCS_wchar_t +; CHECK: .eabi_attribute 26, 1 @ Tag_ABI_enum_size diff --git a/test/CodeGen/ARM/metadata-short-wchar.ll b/test/CodeGen/ARM/metadata-short-wchar.ll new file mode 100644 index 000000000000..6de9bf174317 --- /dev/null +++ b/test/CodeGen/ARM/metadata-short-wchar.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv7--none-eabi" + +define i32 @f(i64 %z) { + ret i32 0 +} + +!llvm.module.flags = !{!0, !1} + +!0 = metadata !{i32 1, metadata !"wchar_size", i32 2} +!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4} + +; CHECK: .eabi_attribute 18, 2 @ Tag_ABI_PCS_wchar_t +; CHECK: .eabi_attribute 26, 2 @ Tag_ABI_enum_size From ed4e8a839f0604b51eb03f47b6cd467917162e89 Mon Sep 17 00:00:00 2001 From: Yaron Keren Date: Fri, 20 Jun 2014 10:26:56 +0000 Subject: [PATCH 0082/1155] The count() function for STL datatypes returns unsigned, even where it's only 1/0 result like std::set. Some of the LLVM ADT already return unsigned count(), while others still return bool count(). In continuation to r197879, this patch modifies DenseMap, DenseSet, ScopedHashTable, ValueMap:: count() to return size_type instead of bool, 1 instead of true and 0 instead of false. size_type is typedef-ed locally within each class to size_t. http://reviews.llvm.org/D4018 Reviewed by dblaikie. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211350 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/BitVector.h | 5 +++-- include/llvm/ADT/DenseMap.h | 9 +++++---- include/llvm/ADT/DenseSet.h | 6 ++++-- include/llvm/ADT/MapVector.h | 6 +++--- include/llvm/ADT/ScopedHashTable.h | 4 +++- include/llvm/ADT/SmallBitVector.h | 5 +++-- include/llvm/ADT/SmallPtrSet.h | 5 +++-- include/llvm/ADT/SmallSet.h | 5 +++-- include/llvm/ADT/SparseBitVector.h | 3 ++- include/llvm/ADT/SparseMultiSet.h | 5 +++-- include/llvm/ADT/SparseSet.h | 5 +++-- include/llvm/IR/ValueMap.h | 9 +++++---- lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp | 2 +- lib/IR/Metadata.cpp | 2 +- 14 files changed, 42 insertions(+), 29 deletions(-) diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index da2b3ad7e7c8..2fa181734bbd 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -34,6 +34,7 @@ class BitVector { unsigned Capacity; // Size of allocated memory in BitWord. public: + typedef size_t size_type; // Encapsulation of a single bit. class reference { friend class BitVector; @@ -111,10 +112,10 @@ class BitVector { bool empty() const { return Size == 0; } /// size - Returns the number of bits in this bitvector. - unsigned size() const { return Size; } + size_type size() const { return Size; } /// count - Returns the number of bits which are set. - unsigned count() const { + size_type count() const { unsigned NumBits = 0; for (unsigned i = 0; i < NumBitWords(size()); ++i) if (sizeof(BitWord) == 4) diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 826913289e99..28a7b68a219a 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -43,6 +43,7 @@ class DenseMapBase { typedef std::pair BucketT; public: + typedef size_t size_type; typedef KeyT key_type; typedef ValueT mapped_type; typedef BucketT value_type; @@ -70,7 +71,7 @@ class DenseMapBase { unsigned size() const { return getNumEntries(); } /// Grow the densemap so that it has at least Size buckets. Does not shrink - void resize(size_t Size) { + void resize(size_type Size) { if (Size > getNumBuckets()) grow(Size); } @@ -99,10 +100,10 @@ class DenseMapBase { setNumTombstones(0); } - /// count - Return true if the specified key is in the map. - bool count(const KeyT &Val) const { + /// Return 1 if the specified key is in the map, 0 otherwise. + size_type count(const KeyT &Val) const { const BucketT *TheBucket; - return LookupBucketFor(Val, TheBucket); + return LookupBucketFor(Val, TheBucket) ? 1 : 0; } iterator find(const KeyT &Val) { diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h index 1d8c39c1441b..d74c6a89b211 100644 --- a/include/llvm/ADT/DenseSet.h +++ b/include/llvm/ADT/DenseSet.h @@ -29,11 +29,12 @@ class DenseSet { public: typedef ValueT key_type; typedef ValueT value_type; + typedef size_t size_type; explicit DenseSet(unsigned NumInitBuckets = 0) : TheMap(NumInitBuckets) {} bool empty() const { return TheMap.empty(); } - unsigned size() const { return TheMap.size(); } + size_type size() const { return TheMap.size(); } size_t getMemorySize() const { return TheMap.getMemorySize(); } /// Grow the DenseSet so that it has at least Size buckets. Will not shrink @@ -44,7 +45,8 @@ class DenseSet { TheMap.clear(); } - bool count(const ValueT &V) const { + /// Return 1 if the specified key is in the set, 0 otherwise. + size_type count(const ValueT &V) const { return TheMap.count(V); } diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h index 8f8b7ba2d7ba..2eae22c56659 100644 --- a/include/llvm/ADT/MapVector.h +++ b/include/llvm/ADT/MapVector.h @@ -29,7 +29,7 @@ template, typename VectorType = std::vector > > class MapVector { - typedef typename VectorType::size_type SizeType; + typedef typename VectorType::size_type size_type; MapType Map; VectorType Vector; @@ -38,7 +38,7 @@ class MapVector { typedef typename VectorType::iterator iterator; typedef typename VectorType::const_iterator const_iterator; - SizeType size() const { + size_type size() const { return Vector.size(); } @@ -100,7 +100,7 @@ class MapVector { return std::make_pair(begin() + I, false); } - unsigned count(const KeyT &Key) const { + size_type count(const KeyT &Key) const { typename MapType::const_iterator Pos = Map.find(Key); return Pos == Map.end()? 0 : 1; } diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h index 3cc7738df8aa..a9ff70e204a0 100644 --- a/include/llvm/ADT/ScopedHashTable.h +++ b/include/llvm/ADT/ScopedHashTable.h @@ -148,6 +148,7 @@ class ScopedHashTable { /// ScopeTy - This is a helpful typedef that allows clients to get easy access /// to the name of the scope for this hash table. typedef ScopedHashTableScope ScopeTy; + typedef size_t size_type; private: typedef ScopedHashTableVal ValTy; DenseMap TopLevelMap; @@ -170,7 +171,8 @@ class ScopedHashTable { AllocatorTy &getAllocator() { return Allocator; } const AllocatorTy &getAllocator() const { return Allocator; } - bool count(const K &Key) const { + /// Return 1 if the specified key is in the table, 0 otherwise. + size_type count(const K &Key) const { return TopLevelMap.count(Key); } diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index e965bc464326..18c68fdd4d24 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -54,6 +54,7 @@ class SmallBitVector { }; public: + typedef size_t size_type; // Encapsulation of a single bit. class reference { SmallBitVector &TheVector; @@ -168,12 +169,12 @@ class SmallBitVector { } /// size - Returns the number of bits in this bitvector. - size_t size() const { + size_type size() const { return isSmall() ? getSmallSize() : getPointer()->size(); } /// count - Returns the number of bits which are set. - unsigned count() const { + size_type count() const { if (isSmall()) { uintptr_t Bits = getSmallBits(); if (NumBaseBits == 32) diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 67104f3cae50..45b2256bdf70 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -73,8 +73,9 @@ class SmallPtrSetImplBase { ~SmallPtrSetImplBase(); public: + typedef size_t size_type; bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const { return size() == 0; } - unsigned size() const { return NumElements; } + size_type size() const { return NumElements; } void clear() { // If the capacity of the array is huge, and the # elements used is small, @@ -263,7 +264,7 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase { } /// count - Return 1 if the specified pointer is in the set, 0 otherwise. - unsigned count(PtrType Ptr) const { + size_type count(PtrType Ptr) const { return count_imp(PtrTraits::getAsVoidPointer(Ptr)) ? 1 : 0; } diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h index 6f36234cb4dd..bb1971eb7c5d 100644 --- a/include/llvm/ADT/SmallSet.h +++ b/include/llvm/ADT/SmallSet.h @@ -37,18 +37,19 @@ class SmallSet { typedef typename SmallVector::const_iterator VIterator; typedef typename SmallVector::iterator mutable_iterator; public: + typedef size_t size_type; SmallSet() {} bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const { return Vector.empty() && Set.empty(); } - unsigned size() const { + size_type size() const { return isSmall() ? Vector.size() : Set.size(); } /// count - Return 1 if the element is in the set, 0 otherwise. - unsigned count(const T &V) const { + size_type count(const T &V) const { if (isSmall()) { // Since the collection is small, just do a linear search. return vfind(V) == Vector.end() ? 0 : 1; diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 706f24862264..16106a7d6304 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -45,6 +45,7 @@ struct SparseBitVectorElement : public ilist_node > { public: typedef unsigned long BitWord; + typedef size_t size_type; enum { BITWORD_SIZE = sizeof(BitWord) * CHAR_BIT, BITWORDS_PER_ELEMENT = (ElementSize + BITWORD_SIZE - 1) / BITWORD_SIZE, @@ -120,7 +121,7 @@ struct SparseBitVectorElement return Bits[Idx / BITWORD_SIZE] & (1L << (Idx % BITWORD_SIZE)); } - unsigned count() const { + size_type count() const { unsigned NumBits = 0; for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) if (sizeof(BitWord) == 4) diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h index d2b2f8d9b6bd..92b9d835b70c 100644 --- a/include/llvm/ADT/SparseMultiSet.h +++ b/include/llvm/ADT/SparseMultiSet.h @@ -185,6 +185,7 @@ class SparseMultiSet { typedef const ValueT &const_reference; typedef ValueT *pointer; typedef const ValueT *const_pointer; + typedef size_t size_type; SparseMultiSet() : Sparse(nullptr), Universe(0), FreelistIdx(SMSNode::INVALID), NumFree(0) {} @@ -327,7 +328,7 @@ class SparseMultiSet { /// This is not the same as BitVector::size() which returns the size of the /// universe. /// - unsigned size() const { + size_type size() const { assert(NumFree <= Dense.size() && "Out-of-bounds free entries"); return Dense.size() - NumFree; } @@ -378,7 +379,7 @@ class SparseMultiSet { /// Returns the number of elements identified by Key. This will be linear in /// the number of elements of that key. - unsigned count(const KeyT &Key) const { + size_type count(const KeyT &Key) const { unsigned Ret = 0; for (const_iterator It = find(Key); It != end(); ++It) ++Ret; diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h index 899f2e4da03c..e347667b4dc8 100644 --- a/include/llvm/ADT/SparseSet.h +++ b/include/llvm/ADT/SparseSet.h @@ -124,6 +124,7 @@ class SparseSet { typedef typename KeyFunctorT::argument_type KeyT; typedef SmallVector DenseT; + typedef size_t size_type; DenseT Dense; SparseT *Sparse; unsigned Universe; @@ -186,7 +187,7 @@ class SparseSet { /// This is not the same as BitVector::size() which returns the size of the /// universe. /// - unsigned size() const { return Dense.size(); } + size_type size() const { return Dense.size(); } /// clear - Clears the set. This is a very fast constant time operation. /// @@ -231,7 +232,7 @@ class SparseSet { /// count - Returns 1 if this set contains an element identified by Key, /// 0 otherwise. /// - unsigned count(const KeyT &Key) const { + size_type count(const KeyT &Key) const { return find(Key) == end() ? 0 : 1; } diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index 17b0fe01b116..e459e4904c60 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -87,6 +87,7 @@ class ValueMap { typedef KeyT key_type; typedef ValueT mapped_type; typedef std::pair value_type; + typedef size_t size_type; explicit ValueMap(unsigned NumInitBuckets = 64) : Map(NumInitBuckets), Data() {} @@ -103,16 +104,16 @@ class ValueMap { inline const_iterator end() const { return const_iterator(Map.end()); } bool empty() const { return Map.empty(); } - unsigned size() const { return Map.size(); } + size_type size() const { return Map.size(); } /// Grow the map so that it has at least Size buckets. Does not shrink void resize(size_t Size) { Map.resize(Size); } void clear() { Map.clear(); } - /// count - Return true if the specified key is in the map. - bool count(const KeyT &Val) const { - return Map.find_as(Val) != Map.end(); + /// Return 1 if the specified key is in the map, 0 otherwise. + size_type count(const KeyT &Val) const { + return Map.find_as(Val) == Map.end() ? 0 : 1; } iterator find(const KeyT &Val) { diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 2212941861b8..6a5c431d4fd1 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -308,7 +308,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { return; const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV) == true); + assert(FnDebugInfo.count(GV)); assert(CurFn == &FnDebugInfo[GV]); if (CurFn->Instrs.empty()) { diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 4d932d03962b..59137e47fa02 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -663,7 +663,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { // Otherwise, we're removing metadata from an instruction. assert((hasMetadataHashEntry() == - getContext().pImpl->MetadataStore.count(this)) && + (getContext().pImpl->MetadataStore.count(this) > 0)) && "HasMetadata bit out of date!"); if (!hasMetadataHashEntry()) return; // Nothing to remove! From 7d5bff0d5a236a5e3a6f6a159d2a6607fb76368a Mon Sep 17 00:00:00 2001 From: Yaron Keren Date: Fri, 20 Jun 2014 10:52:57 +0000 Subject: [PATCH 0083/1155] Attempting to fix the 64 bit bots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211351 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/BitVector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index 2fa181734bbd..34e2284311b3 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -34,7 +34,7 @@ class BitVector { unsigned Capacity; // Size of allocated memory in BitWord. public: - typedef size_t size_type; + typedef unsigned size_type; // Encapsulation of a single bit. class reference { friend class BitVector; From 212e2b9c8dc74b16bd6297243c18064bae9ea113 Mon Sep 17 00:00:00 2001 From: Yaron Keren Date: Fri, 20 Jun 2014 12:20:56 +0000 Subject: [PATCH 0084/1155] Reverting size_type for the containers from size_type to unsigned. Various places in LLVM assume that container size and count are unsigned and do not use the container size_type. Therefore they break compilation (or possibly executation) for LP64 systems where size_t is 64 bit while unsigned is still 32 bit. If we'll ever that many items in the container size_type could be made size_t for a specific containers after reviweing its other uses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211353 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/DenseMap.h | 2 +- include/llvm/ADT/DenseSet.h | 2 +- include/llvm/ADT/ScopedHashTable.h | 2 +- include/llvm/ADT/SmallBitVector.h | 2 +- include/llvm/ADT/SmallPtrSet.h | 2 +- include/llvm/ADT/SparseBitVector.h | 2 +- include/llvm/ADT/SparseMultiSet.h | 2 +- include/llvm/ADT/SparseSet.h | 2 +- include/llvm/IR/ValueMap.h | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 28a7b68a219a..85f37b9051b1 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -43,7 +43,7 @@ class DenseMapBase { typedef std::pair BucketT; public: - typedef size_t size_type; + typedef unsigned size_type; typedef KeyT key_type; typedef ValueT mapped_type; typedef BucketT value_type; diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h index d74c6a89b211..37a81b0c7ee2 100644 --- a/include/llvm/ADT/DenseSet.h +++ b/include/llvm/ADT/DenseSet.h @@ -29,7 +29,7 @@ class DenseSet { public: typedef ValueT key_type; typedef ValueT value_type; - typedef size_t size_type; + typedef unsigned size_type; explicit DenseSet(unsigned NumInitBuckets = 0) : TheMap(NumInitBuckets) {} diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h index a9ff70e204a0..02a6ea345834 100644 --- a/include/llvm/ADT/ScopedHashTable.h +++ b/include/llvm/ADT/ScopedHashTable.h @@ -148,7 +148,7 @@ class ScopedHashTable { /// ScopeTy - This is a helpful typedef that allows clients to get easy access /// to the name of the scope for this hash table. typedef ScopedHashTableScope ScopeTy; - typedef size_t size_type; + typedef unsigned size_type; private: typedef ScopedHashTableVal ValTy; DenseMap TopLevelMap; diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index 18c68fdd4d24..1e49e27001cf 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -54,7 +54,7 @@ class SmallBitVector { }; public: - typedef size_t size_type; + typedef unsigned size_type; // Encapsulation of a single bit. class reference { SmallBitVector &TheVector; diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 45b2256bdf70..74f3fd43cec4 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -73,7 +73,7 @@ class SmallPtrSetImplBase { ~SmallPtrSetImplBase(); public: - typedef size_t size_type; + typedef unsigned size_type; bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const { return size() == 0; } size_type size() const { return NumElements; } diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 16106a7d6304..36754d682355 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -45,7 +45,7 @@ struct SparseBitVectorElement : public ilist_node > { public: typedef unsigned long BitWord; - typedef size_t size_type; + typedef unsigned size_type; enum { BITWORD_SIZE = sizeof(BitWord) * CHAR_BIT, BITWORDS_PER_ELEMENT = (ElementSize + BITWORD_SIZE - 1) / BITWORD_SIZE, diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h index 92b9d835b70c..dc1273eb7ff6 100644 --- a/include/llvm/ADT/SparseMultiSet.h +++ b/include/llvm/ADT/SparseMultiSet.h @@ -185,7 +185,7 @@ class SparseMultiSet { typedef const ValueT &const_reference; typedef ValueT *pointer; typedef const ValueT *const_pointer; - typedef size_t size_type; + typedef unsigned size_type; SparseMultiSet() : Sparse(nullptr), Universe(0), FreelistIdx(SMSNode::INVALID), NumFree(0) {} diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h index e347667b4dc8..632d52ad9d82 100644 --- a/include/llvm/ADT/SparseSet.h +++ b/include/llvm/ADT/SparseSet.h @@ -124,7 +124,7 @@ class SparseSet { typedef typename KeyFunctorT::argument_type KeyT; typedef SmallVector DenseT; - typedef size_t size_type; + typedef unsigned size_type; DenseT Dense; SparseT *Sparse; unsigned Universe; diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index e459e4904c60..3d575ac874d9 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -87,7 +87,7 @@ class ValueMap { typedef KeyT key_type; typedef ValueT mapped_type; typedef std::pair value_type; - typedef size_t size_type; + typedef unsigned size_type; explicit ValueMap(unsigned NumInitBuckets = 64) : Map(NumInitBuckets), Data() {} From 729aa1dbd32520b18007afed04070bdf3435407f Mon Sep 17 00:00:00 2001 From: Yaron Keren Date: Fri, 20 Jun 2014 12:57:44 +0000 Subject: [PATCH 0085/1155] Code in LoopStrengthReduce.cpp depends on SmallBitVector::size() being size_t and not unsigned. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211356 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/SmallBitVector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index 1e49e27001cf..0922017ea61a 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -169,7 +169,7 @@ class SmallBitVector { } /// size - Returns the number of bits in this bitvector. - size_type size() const { + size_t size() const { return isSmall() ? getSmallSize() : getPointer()->size(); } From 61b696f1fa894a3a946fe12ec4b1ec4fd6580a0e Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 13:11:28 +0000 Subject: [PATCH 0086/1155] Allow a target to create a null streamer. Targets can assume that a target streamer is present, so they have to be able to construct a null streamer in order to set the target streamer in it to. Fixes a crash when using the null streamer with arm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211358 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/TargetRegistry.h | 20 +++- lib/CodeGen/LLVMTargetMachine.cpp | 2 +- .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 6 ++ .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 6 ++ lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 2 + .../ARM/MCTargetDesc/ARMTargetStreamer.cpp | 98 +++++-------------- test/DebugInfo/global.ll | 3 + 7 files changed, 63 insertions(+), 74 deletions(-) diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index fcdc60490caa..291fb1b95db3 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -51,6 +51,7 @@ namespace llvm { class raw_ostream; class formatted_raw_ostream; + MCStreamer *createNullStreamer(MCContext &Ctx); MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, @@ -139,6 +140,7 @@ namespace llvm { MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst); + typedef MCStreamer *(*NullStreamerCtorTy)(MCContext &Ctx); typedef MCRelocationInfo *(*MCRelocationInfoCtorTy)(StringRef TT, MCContext &Ctx); typedef MCSymbolizer *(*MCSymbolizerCtorTy)(StringRef TT, @@ -225,6 +227,10 @@ namespace llvm { /// AsmStreamer, if registered (default = llvm::createAsmStreamer). AsmStreamerCtorTy AsmStreamerCtorFn; + /// Construction function for this target's NullStreamer, if registered + /// (default = llvm::createNullStreamer). + NullStreamerCtorTy NullStreamerCtorFn; + /// MCRelocationInfoCtorFn - Construction function for this target's /// MCRelocationInfo, if registered (default = llvm::createMCRelocationInfo) MCRelocationInfoCtorTy MCRelocationInfoCtorFn; @@ -235,8 +241,8 @@ namespace llvm { public: Target() - : AsmStreamerCtorFn(nullptr), MCRelocationInfoCtorFn(nullptr), - MCSymbolizerCtorFn(nullptr) {} + : AsmStreamerCtorFn(nullptr), NullStreamerCtorFn(nullptr), + MCRelocationInfoCtorFn(nullptr), MCSymbolizerCtorFn(nullptr) {} /// @name Target Information /// @{ @@ -447,6 +453,12 @@ namespace llvm { InstPrint, CE, TAB, ShowInst); } + MCStreamer *createNullStreamer(MCContext &Ctx) const { + if (NullStreamerCtorFn) + return NullStreamerCtorFn(Ctx); + return llvm::createNullStreamer(Ctx); + } + /// createMCRelocationInfo - Create a target specific MCRelocationInfo. /// /// \param TT The target triple. @@ -780,6 +792,10 @@ namespace llvm { T.AsmStreamerCtorFn = Fn; } + static void RegisterNullStreamer(Target &T, Target::NullStreamerCtorTy Fn) { + T.NullStreamerCtorFn = Fn; + } + /// RegisterMCRelocationInfo - Register an MCRelocationInfo /// implementation for the given target. /// diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 29062434f00e..df96b945a8d9 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -209,7 +209,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_Null: // The Null output is intended for use for performance analysis and testing, // not real users. - AsmStreamer.reset(createNullStreamer(*Context)); + AsmStreamer.reset(getTarget().createNullStreamer(*Context)); break; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index a4d13edd3ac7..66a2d59e82cf 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -1332,6 +1332,12 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, return S; } +MCStreamer *createARMNullStreamer(MCContext &Ctx) { + MCStreamer *S = llvm::createNullStreamer(Ctx); + new ARMTargetStreamer(*S); + return S; +} + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 04d63a7e6d46..2b3855d793af 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -427,6 +427,12 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer); TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer); + // Register the null streamer. + TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer); + // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 8853a8c69c50..5326e564f363 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -51,6 +51,8 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst); +MCStreamer *createARMNullStreamer(MCContext &Ctx); + MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 0cb795ba3ccc..ad3f1caae347 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -40,78 +40,34 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); } // The remaining callbacks should be handled separately by each // streamer. -void ARMTargetStreamer::emitFnStart() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitFnEnd() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitCantUnwind() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitHandlerData() { - llvm_unreachable("unimplemented"); -} +void ARMTargetStreamer::emitFnStart() {} +void ARMTargetStreamer::emitFnEnd() {} +void ARMTargetStreamer::emitCantUnwind() {} +void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {} +void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {} +void ARMTargetStreamer::emitHandlerData() {} void ARMTargetStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, - int64_t Offset) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitPad(int64_t Offset) { - llvm_unreachable("unimplemented"); -} -void -ARMTargetStreamer::emitRegSave(const SmallVectorImpl &RegList, - bool isVector) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitUnwindRaw( - int64_t StackOffset, const SmallVectorImpl &Opcodes) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::switchVendor(StringRef Vendor) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) { - llvm_unreachable("unimplemented"); -} + int64_t Offset) {} +void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {} +void ARMTargetStreamer::emitPad(int64_t Offset) {} +void ARMTargetStreamer::emitRegSave(const SmallVectorImpl &RegList, + bool isVector) {} +void ARMTargetStreamer::emitUnwindRaw(int64_t StackOffset, + const SmallVectorImpl &Opcodes) { +} +void ARMTargetStreamer::switchVendor(StringRef Vendor) {} +void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {} void ARMTargetStreamer::emitTextAttribute(unsigned Attribute, - StringRef String) { - llvm_unreachable("unimplemented"); -} + StringRef String) {} void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute, - unsigned IntValue, - StringRef StringValue) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitArch(unsigned Arch) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitObjectArch(unsigned Arch) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitFPU(unsigned FPU) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::finishAttributeSection() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::AnnotateTLSDescriptorSequence( - const MCSymbolRefExpr *SRE) { - llvm_unreachable("unimplemented"); -} + unsigned IntValue, + StringRef StringValue) {} +void ARMTargetStreamer::emitArch(unsigned Arch) {} +void ARMTargetStreamer::emitObjectArch(unsigned Arch) {} +void ARMTargetStreamer::emitFPU(unsigned FPU) {} +void ARMTargetStreamer::finishAttributeSection() {} +void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} +void +ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} -void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { - llvm_unreachable("unimplemented"); -} +void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll index 3c97f0cb2279..5475b251d64f 100644 --- a/test/DebugInfo/global.ll +++ b/test/DebugInfo/global.ll @@ -6,6 +6,9 @@ ; Also test that the null streamer doesn't crash with debug info. ; RUN: %llc_dwarf -O0 -filetype=null < %s +; Test the null streamer with a terget streamer too. +; RUN: llc -O0 -filetype=null -mtriple=arm-linux < %s + ; generated from the following source compiled to bitcode with clang -g -O1 ; static int i; ; int main() { From e54c32ebc6b629acf3da91a104623741bb747248 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 15:30:38 +0000 Subject: [PATCH 0087/1155] Move test so that it is skipped if the ARM target is not enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211366 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/ARM/null-streamer.ll | 7 +++++++ test/DebugInfo/global.ll | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/ARM/null-streamer.ll diff --git a/test/CodeGen/ARM/null-streamer.ll b/test/CodeGen/ARM/null-streamer.ll new file mode 100644 index 000000000000..350c45e5bebe --- /dev/null +++ b/test/CodeGen/ARM/null-streamer.ll @@ -0,0 +1,7 @@ +; Test the null streamer with a terget streamer. +; RUN: llc -O0 -filetype=null -mtriple=arm-linux < %s + +define i32 @main() { +entry: + ret i32 0 +} diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll index 5475b251d64f..3c97f0cb2279 100644 --- a/test/DebugInfo/global.ll +++ b/test/DebugInfo/global.ll @@ -6,9 +6,6 @@ ; Also test that the null streamer doesn't crash with debug info. ; RUN: %llc_dwarf -O0 -filetype=null < %s -; Test the null streamer with a terget streamer too. -; RUN: llc -O0 -filetype=null -mtriple=arm-linux < %s - ; generated from the following source compiled to bitcode with clang -g -O1 ; static int i; ; int main() { From 69e4786797780c25dbd7820b47998207d44a4d18 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 20 Jun 2014 16:34:05 +0000 Subject: [PATCH 0088/1155] [PowerPC] Fix small argument stack slot offset for LE When small arguments (structures < 8 bytes or "float") are passed in a stack slot in the ppc64 SVR4 ABI, they must reside in the least significant part of that slot. On BE, this means that an offset needs to be added to the stack address of the parameter, but on LE, the least significant part of the slot has the same address as the slot itself. This changes the PowerPC back-end ABI code to only add the small argument stack slot offset for BE. It also adds test cases to verify the correct behavior on both BE and LE. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 31 ++++++++----- test/CodeGen/PowerPC/ppc64-smallarg.ll | 59 ++++++++++++++++++++++++ test/CodeGen/PowerPC/ppc64le-smallarg.ll | 59 ++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/PowerPC/ppc64-smallarg.ll create mode 100644 test/CodeGen/PowerPC/ppc64le-smallarg.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f6884d5a2716..b20516e3a4c2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2433,6 +2433,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // + bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); @@ -2533,7 +2534,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( } // All aggregates smaller than 8 bytes must be passed right-justified. - if (ObjSize < PtrByteSize) + if (ObjSize < PtrByteSize && !isLittleEndian) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); // The value of the object is its address. int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); @@ -2683,9 +2684,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type. if (needsLoad) { - int FI = MFI->CreateFixedObject(ObjSize, - CurArgOffset + (ArgSize - ObjSize), - isImmutable); + if (ObjSize < ArgSize && !isLittleEndian) + CurArgOffset += ArgSize - ObjSize; + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), false, false, false, 0); @@ -4034,6 +4035,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { + bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -4177,9 +4179,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, } if (GPR_idx == NumGPRs && Size < 8) { - SDValue Const = DAG.getConstant(PtrByteSize - Size, - PtrOff.getValueType()); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue AddPtr = PtrOff; + if (!isLittleEndian) { + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); + AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); @@ -4214,8 +4219,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // small aggregates, particularly for packed ones. // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. - SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue AddPtr = PtrOff; + if (!isLittleEndian) { + SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); + AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); @@ -4276,7 +4284,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be passed right-justified in the stack doubleword, and // in the GPR, if one is available. SDValue StoreOff; - if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { + if (Arg.getSimpleValueType().SimpleTy == MVT::f32 && + !isLittleEndian) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } else @@ -4300,7 +4309,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, } else { // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. - if (Arg.getValueType() == MVT::f32) { + if (Arg.getValueType() == MVT::f32 && !isLittleEndian) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } diff --git a/test/CodeGen/PowerPC/ppc64-smallarg.ll b/test/CodeGen/PowerPC/ppc64-smallarg.ll new file mode 100644 index 000000000000..0d5b078e217a --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-smallarg.ll @@ -0,0 +1,59 @@ +; Verify that small structures and float arguments are passed in the +; least significant part of a stack slot doubleword. + +; RUN: llc < %s | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.large_arg = type { [8 x i64] } +%struct.small_arg = type { i16, i8 } + +@gl = common global %struct.large_arg zeroinitializer, align 8 +@gs = common global %struct.small_arg zeroinitializer, align 2 +@gf = common global float 0.000000e+00, align 4 + +define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) { +entry: + %0 = bitcast %struct.small_arg* %x to i32* + %1 = bitcast %struct.small_arg* %agg.result to i32* + %2 = load i32* %0, align 2 + store i32 %2, i32* %1, align 2 + ret void +} +; CHECK: @callee1 +; CHECK: lwz {{[0-9]+}}, 124(1) +; CHECK: blr + +define void @caller1() { +entry: + %tmp = alloca %struct.small_arg, align 2 + call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs) + ret void +} +; CHECK: @caller1 +; CHECK: stw {{[0-9]+}}, 124(1) +; CHECK: bl test1 + +declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval) + +define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) { +entry: + ret float %x +} +; CHECK: @callee2 +; CHECK: lfs {{[0-9]+}}, 156(1) +; CHECK: blr + +define void @caller2() { +entry: + %0 = load float* @gf, align 4 + %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0) + ret void +} +; CHECK: @caller2 +; CHECK: stfs {{[0-9]+}}, 156(1) +; CHECK: bl test2 + +declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) + diff --git a/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/test/CodeGen/PowerPC/ppc64le-smallarg.ll new file mode 100644 index 000000000000..fcb1e9200ddc --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -0,0 +1,59 @@ +; Verify that small structures and float arguments are passed in the +; least significant part of a stack slot doubleword. + +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%struct.large_arg = type { [8 x i64] } +%struct.small_arg = type { i16, i8 } + +@gl = common global %struct.large_arg zeroinitializer, align 8 +@gs = common global %struct.small_arg zeroinitializer, align 2 +@gf = common global float 0.000000e+00, align 4 + +define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) { +entry: + %0 = bitcast %struct.small_arg* %x to i32* + %1 = bitcast %struct.small_arg* %agg.result to i32* + %2 = load i32* %0, align 2 + store i32 %2, i32* %1, align 2 + ret void +} +; CHECK: @callee1 +; CHECK: lwz {{[0-9]+}}, 120(1) +; CHECK: blr + +define void @caller1() { +entry: + %tmp = alloca %struct.small_arg, align 2 + call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs) + ret void +} +; CHECK: @caller1 +; CHECK: stw {{[0-9]+}}, 120(1) +; CHECK: bl test1 + +declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval) + +define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) { +entry: + ret float %x +} +; CHECK: @callee2 +; CHECK: lfs {{[0-9]+}}, 152(1) +; CHECK: blr + +define void @caller2() { +entry: + %0 = load float* @gf, align 4 + %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0) + ret void +} +; CHECK: @caller2 +; CHECK: stfs {{[0-9]+}}, 152(1) +; CHECK: bl test2 + +declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) + From 538c95179c6496cef56e28623a912c397bfe4b29 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 20 Jun 2014 17:05:57 +0000 Subject: [PATCH 0089/1155] R600/SI: Add a VALU pattern for i64 xor git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211373 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 11 +++++++---- test/CodeGen/R600/xor.ll | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 26024dc4f2df..ba1fae2e5772 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1773,15 +1773,18 @@ def : RsqPat; // VOP2 Patterns //===----------------------------------------------------------------------===// -def : Pat < - (or i64:$src0, i64:$src1), +class BinOp64Pat : Pat < + (node i64:$src0, i64:$src1), (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0), + (inst (EXTRACT_SUBREG i64:$src0, sub0), (EXTRACT_SUBREG i64:$src1, sub0)), sub0), - (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1), + (inst (EXTRACT_SUBREG i64:$src0, sub1), (EXTRACT_SUBREG i64:$src1, sub1)), sub1) >; +def : BinOp64Pat ; +def : BinOp64Pat ; + class SextInReg : Pat < (sext_inreg i32:$src0, vt), (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0)) diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index 004304173990..ab618cf40c3f 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -130,3 +130,29 @@ define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 store i64 %result, i64 addrspace(1)* %out ret void } + +; Test that we have a pattern to match xor inside a branch. +; Note that in the future the backend may be smart enough to +; use an SALU instruction for this. + +; SI-CHECK-LABEL: @xor_cf +; SI-CHECK: V_XOR +; SI-CHECK: V_XOR +define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) { +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = xor i64 %a, %b + br label %endif + +else: + %2 = load i64 addrspace(1)* %in + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} From 98b0736565214ec13b9b1bd329d041cbf6e18f9c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 20 Jun 2014 17:06:02 +0000 Subject: [PATCH 0090/1155] R600/SI: SI Control Flow Annotation bug fixed Mixing of AddAvailableValue and GetValueAtEndOfBlock methods of SSAUpdater leaded to the endless loop generation when the nested loops annotated. This fixes a bug in the OCL_ML/KNN OpenCV test. The test case is too complex for FileCheck and would be very fragile. Patch by: Elena Denisova git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211374 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIAnnotateControlFlow.cpp | 46 +++++++++++------------ 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index d6e445136faf..91eb60beb6bb 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -65,7 +65,6 @@ class SIAnnotateControlFlow : public FunctionPass { DominatorTree *DT; StackVector Stack; - SSAUpdater PhiInserter; bool isTopOfStack(BasicBlock *BB); @@ -81,7 +80,7 @@ class SIAnnotateControlFlow : public FunctionPass { void insertElse(BranchInst *Term); - void handleLoopCondition(Value *Cond); + Value *handleLoopCondition(Value *Cond, PHINode *Broken); void handleLoop(BranchInst *Term); @@ -177,7 +176,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) { } else { if (Phi->getIncomingValue(i) != BoolFalse) return false; - + } } return true; @@ -204,20 +203,26 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { } /// \brief Recursively handle the condition leading to a loop -void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { +Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) { if (PHINode *Phi = dyn_cast(Cond)) { + BasicBlock *Parent = Phi->getParent(); + PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); + Value *Ret = NewPhi; // Handle all non-constant incoming values first for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { Value *Incoming = Phi->getIncomingValue(i); - if (isa(Incoming)) + BasicBlock *From = Phi->getIncomingBlock(i); + if (isa(Incoming)) { + NewPhi->addIncoming(Broken, From); continue; + } Phi->setIncomingValue(i, BoolFalse); - handleLoopCondition(Incoming); + Value *PhiArg = handleLoopCondition(Incoming, Broken); + NewPhi->addIncoming(PhiArg, From); } - BasicBlock *Parent = Phi->getParent(); BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock(); for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { @@ -230,33 +235,28 @@ void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { if (From == IDom) { CallInst *OldEnd = dyn_cast(Parent->getFirstInsertionPt()); if (OldEnd && OldEnd->getCalledFunction() == EndCf) { - Value *Args[] = { - OldEnd->getArgOperand(0), - PhiInserter.GetValueAtEndOfBlock(Parent) - }; - Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); - PhiInserter.AddAvailableValue(Parent, Ret); + Value *Args[] = { OldEnd->getArgOperand(0), NewPhi }; + Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); continue; } } - TerminatorInst *Insert = From->getTerminator(); - Value *Arg = PhiInserter.GetValueAtEndOfBlock(From); - Value *Ret = CallInst::Create(Break, Arg, "", Insert); - PhiInserter.AddAvailableValue(From, Ret); + Value *PhiArg = CallInst::Create(Break, Broken, "", Insert); + NewPhi->setIncomingValue(i, PhiArg); } eraseIfUnused(Phi); + return Ret; } else if (Instruction *Inst = dyn_cast(Cond)) { BasicBlock *Parent = Inst->getParent(); TerminatorInst *Insert = Parent->getTerminator(); - Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) }; - Value *Ret = CallInst::Create(IfBreak, Args, "", Insert); - PhiInserter.AddAvailableValue(Parent, Ret); + Value *Args[] = { Cond, Broken }; + return CallInst::Create(IfBreak, Args, "", Insert); } else { llvm_unreachable("Unhandled loop condition!"); } + return 0; } /// \brief Handle a back edge (loop) @@ -264,15 +264,11 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); - PhiInserter.Initialize(Int64, ""); - PhiInserter.AddAvailableValue(Target, Broken); - Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); - handleLoopCondition(Cond); + Value *Arg = handleLoopCondition(Cond, Broken); BasicBlock *BB = Term->getParent(); - Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB); for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); PI != PE; ++PI) { From 2d245e2da458a8088f46136df93b918282833994 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 20 Jun 2014 17:06:05 +0000 Subject: [PATCH 0091/1155] R600: Expand vector fexp2 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211375 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + test/CodeGen/R600/input-mods.ll | 26 +++++++ test/CodeGen/R600/llvm.exp2.ll | 93 ++++++++++++++++++++------ 3 files changed, 100 insertions(+), 20 deletions(-) create mode 100644 test/CodeGen/R600/input-mods.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 1aa92fadbeea..ee6eb5bd32ec 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -320,6 +320,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll new file mode 100644 index 000000000000..13bfbab85695 --- /dev/null +++ b/test/CodeGen/R600/input-mods.ll @@ -0,0 +1,26 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK + +;EG-CHECK-LABEL: @test +;EG-CHECK: EXP_IEEE * +;CM-CHECK-LABEL: @test +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X| + +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = call float @llvm.fabs.f32(float %r0) + %r2 = fsub float -0.000000e+00, %r1 + %r3 = call float @llvm.exp2.f32(float %r2) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) + ret void +} + +declare float @llvm.exp2.f32(float) readnone +declare float @llvm.fabs.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.exp2.ll b/test/CodeGen/R600/llvm.exp2.ll index 13bfbab85695..119d5ef49a5e 100644 --- a/test/CodeGen/R600/llvm.exp2.ll +++ b/test/CodeGen/R600/llvm.exp2.ll @@ -1,26 +1,79 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC -;EG-CHECK-LABEL: @test -;EG-CHECK: EXP_IEEE * -;CM-CHECK-LABEL: @test -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X| +;FUNC-LABEL: @test +;EG-CHECK: EXP_IEEE +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_EXP_F32 -define void @test(<4 x float> inreg %reg0) #0 { - %r0 = extractelement <4 x float> %reg0, i32 0 - %r1 = call float @llvm.fabs.f32(float %r0) - %r2 = fsub float -0.000000e+00, %r1 - %r3 = call float @llvm.exp2.f32(float %r2) - %vec = insertelement <4 x float> undef, float %r3, i32 0 - call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) +define void @test(float addrspace(1)* %out, float %in) { +entry: + %0 = call float @llvm.exp2.f32(float %in) + store float %0, float addrspace(1)* %out ret void } -declare float @llvm.exp2.f32(float) readnone -declare float @llvm.fabs.f32(float) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) +;FUNC-LABEL: @testv2 +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 + +define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +entry: + %0 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in) + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} -attributes #0 = { "ShaderType"="0" } +;FUNC-LABEL: @testv4 +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 +define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in) + store <4 x float> %0, <4 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.exp2.f32(float) readnone +declare <2 x float> @llvm.exp2.v2f32(<2 x float>) readnone +declare <4 x float> @llvm.exp2.v4f32(<4 x float>) readnone From 2cda6e8ca62c2ed33532b8af022db082d5270830 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 20 Jun 2014 17:06:07 +0000 Subject: [PATCH 0092/1155] R600: Expand vector flog2 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211376 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + test/CodeGen/R600/llvm.log2.ll | 79 ++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 test/CodeGen/R600/llvm.log2.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index ee6eb5bd32ec..d62f8b412a25 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -321,6 +321,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); diff --git a/test/CodeGen/R600/llvm.log2.ll b/test/CodeGen/R600/llvm.log2.ll new file mode 100644 index 000000000000..4cba2d44a5c3 --- /dev/null +++ b/test/CodeGen/R600/llvm.log2.ll @@ -0,0 +1,79 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC + +;FUNC-LABEL: @test +;EG-CHECK: LOG_IEEE +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_LOG_F32 + +define void @test(float addrspace(1)* %out, float %in) { +entry: + %0 = call float @llvm.log2.f32(float %in) + store float %0, float addrspace(1)* %out + ret void +} + +;FUNC-LABEL: @testv2 +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 + +define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +entry: + %0 = call <2 x float> @llvm.log2.v2f32(<2 x float> %in) + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + +;FUNC-LABEL: @testv4 +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 +define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in) + store <4 x float> %0, <4 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.log2.f32(float) readnone +declare <2 x float> @llvm.log2.v2f32(<2 x float>) readnone +declare <4 x float> @llvm.log2.v4f32(<4 x float>) readnone From 61d64acd0c784594e3b109f606dc981d7735156d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 20 Jun 2014 17:06:09 +0000 Subject: [PATCH 0093/1155] R600/SI: Add a pattern for f32 ftrunc git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211377 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 2 + lib/Target/R600/R600Instructions.td | 5 +- lib/Target/R600/SIInstructions.td | 2 +- test/CodeGen/R600/ftrunc.ll | 191 +++++++++++++------------ 4 files changed, 104 insertions(+), 96 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d62f8b412a25..f248753bdb39 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -938,6 +938,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name. return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_trunc: + return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1)); } } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 47b7da0955d8..616b3b533b59 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -721,14 +721,11 @@ def SETNE_DX10 : R600_2OP < >; def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; -def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; +def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; -// Add also ftrunc intrinsic pattern -def : Pat<(ftrunc f32:$src0), (TRUNC $src0)>; - def MOV : R600_1OP <0x19, "MOV", []>; let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index ba1fae2e5772..d489f51086e1 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1098,7 +1098,7 @@ defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", [(set f32:$dst, (AMDGPUfract f32:$src0))] >; defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", - [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))] + [(set f32:$dst, (ftrunc f32:$src0))] >; defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", [(set f32:$dst, (fceil f32:$src0))] diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll index 3cd1deb921fc..0d7d4679fe3d 100644 --- a/test/CodeGen/R600/ftrunc.ll +++ b/test/CodeGen/R600/ftrunc.ll @@ -1,110 +1,119 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s -declare double @llvm.trunc.f64(double) nounwind readnone -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone -declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone -declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone -declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone -declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone +declare float @llvm.trunc.f32(float) nounwind readnone +declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone +declare <3 x float> @llvm.trunc.v3f32(<3 x float>) nounwind readnone +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone +declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone -; FUNC-LABEL: @v_ftrunc_f64: -; CI: V_TRUNC_F64_e32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11 -; SI: S_ENDPGM -define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { - %x = load double addrspace(1)* %in, align 8 - %y = call double @llvm.trunc.f64(double %x) nounwind readnone - store double %y, double addrspace(1)* %out, align 8 +; FUNC-LABEL: @ftrunc_f32: +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +define void @ftrunc_f32(float addrspace(1)* %out, float %x) { + %y = call float @llvm.trunc.f32(float %x) nounwind readnone + store float %y, float addrspace(1)* %out ret void } -; FUNC-LABEL: @ftrunc_f64: -; CI: V_TRUNC_F64_e32 - -; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 -; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 -; SI: S_LSHR_B64 -; SI: S_NOT_B64 -; SI: S_AND_B64 -; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI: CMP_LT_I32 -; SI: CNDMASK_B32 -; SI: CNDMASK_B32 -; SI: CMP_GT_I32 -; SI: CNDMASK_B32 -; SI: CNDMASK_B32 -; SI: S_ENDPGM -define void @ftrunc_f64(double addrspace(1)* %out, double %x) { - %y = call double @llvm.trunc.f64(double %x) nounwind readnone - store double %y, double addrspace(1)* %out - ret void -} - -; FUNC-LABEL: @ftrunc_v2f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { - %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone - store <2 x double> %y, <2 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v2f32: +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) { + %y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone + store <2 x float> %y, <2 x float> addrspace(1)* %out ret void } -; FIXME-FUNC-LABEL: @ftrunc_v3f64: -; FIXME-CI: V_TRUNC_F64_e32 -; FIXME-CI: V_TRUNC_F64_e32 -; FIXME-CI: V_TRUNC_F64_e32 -; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { -; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone -; store <3 x double> %y, <3 x double> addrspace(1)* %out +; FIXME-FUNC-LABEL: @ftrunc_v3f32: +; FIXME-EG: TRUNC +; FIXME-EG: TRUNC +; FIXME-EG: TRUNC +; FIXME-SI: V_TRUNC_F32_e32 +; FIXME-SI: V_TRUNC_F32_e32 +; FIXME-SI: V_TRUNC_F32_e32 +; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) { +; %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone +; store <3 x float> %y, <3 x float> addrspace(1)* %out ; ret void ; } -; FUNC-LABEL: @ftrunc_v4f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { - %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone - store <4 x double> %y, <4 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v4f32: +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) { + %y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone + store <4 x float> %y, <4 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: @ftrunc_v8f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { - %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone - store <8 x double> %y, <8 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v8f32: +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) { + %y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone + store <8 x float> %y, <8 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: @ftrunc_v16f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { - %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone - store <16 x double> %y, <16 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v16f32: +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) { + %y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone + store <16 x float> %y, <16 x float> addrspace(1)* %out ret void } From c0bf939e80f58f5fdfff4f2cdf73721c1c390005 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 20 Jun 2014 17:06:11 +0000 Subject: [PATCH 0094/1155] R600/SI: Add patterns for ctpop inside a branch git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211378 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 50 +++++++++++++++++++++++-------- test/CodeGen/R600/ctpop.ll | 30 +++++++++++++++++++ test/CodeGen/R600/ctpop64.ll | 31 +++++++++++++++++++ 3 files changed, 99 insertions(+), 12 deletions(-) diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index d489f51086e1..f888b8e6e052 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1742,6 +1742,24 @@ def : Pat < (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; +} // Predicates = [isSI] in { + +//===----------------------------------------------------------------------===// +// SOP1 Patterns +//===----------------------------------------------------------------------===// + +let Predicates = [isSI, isCFDepth0] in { + +def : Pat < + (i64 (ctpop i64:$src)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_BCNT1_I32_B64 $src), sub0), + (S_MOV_B32 0), sub1) +>; + +} // Predicates = [isSI, isCFDepth0] + +let Predicates = [isSI] in { //===----------------------------------------------------------------------===// // SOP2 Patterns //===----------------------------------------------------------------------===// @@ -1793,6 +1811,26 @@ class SextInReg : Pat < def : SextInReg ; def : SextInReg ; +def : Pat < + (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), + (V_BCNT_U32_B32_e32 $popcnt, $val) +>; + +def : Pat < + (i32 (ctpop i32:$popcnt)), + (V_BCNT_U32_B32_e64 $popcnt, 0, 0, 0) +>; + +def : Pat < + (i64 (ctpop i64:$src)), + (INSERT_SUBREG + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (V_BCNT_U32_B32_e32 (EXTRACT_SUBREG $src, sub1), + (V_BCNT_U32_B32_e64 (EXTRACT_SUBREG $src, sub0), 0, 0, 0)), + sub0), + (V_MOV_B32_e32 0), sub1) +>; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ @@ -2786,18 +2824,6 @@ def : Pat < (S_ADD_I32 $src0, $src1) >; -def : Pat < - (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), - (V_BCNT_U32_B32_e32 $popcnt, $val) ->; - -def : Pat < - (i64 (ctpop i64:$src)), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_BCNT1_I32_B64 $src), sub0), - (S_MOV_B32 0), sub1) ->; - //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index e4d11e003696..15be8e162f3f 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -252,3 +252,33 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp store i32 %add, i32 addrspace(1)* %out, align 4 ret void } + +; FIXME: We currently disallow SALU instructions in all branches, +; but there are some cases when the should be allowed. + +; FUNC-LABEL: @ctpop_i32_in_br +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +; EG: BCNT_INT +define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) { +entry: + %0 = icmp eq i32 %cond, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i32 addrspace(1)* %in + %2 = call i32 @llvm.ctpop.i32(i32 %1) + br label %endif + +else: + %3 = getelementptr i32 addrspace(1)* %in, i32 1 + %4 = load i32 addrspace(1)* %3 + br label %endif + +endif: + %5 = phi i32 [%2, %if], [%4, %else] + store i32 %5, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll index 798d8f55ecc1..b36ecc68d895 100644 --- a/test/CodeGen/R600/ctpop64.ll +++ b/test/CodeGen/R600/ctpop64.ll @@ -89,3 +89,34 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16 ret void } + +; FIXME: We currently disallow SALU instructions in all branches, +; but there are some cases when the should be allowed. + +; FUNC-LABEL: @ctpop_i64_in_br +; SI: V_BCNT_U32_B32_e64 [[BCNT_LO:v[0-9]+]], v{{[0-9]+}}, 0 +; SI: V_BCNT_U32_B32_e32 v[[BCNT:[0-9]+]], v{{[0-9]+}}, [[BCNT_LO]] +; SI: V_MOV_B32_e32 v[[ZERO:[0-9]+]], 0 +; SI: BUFFER_STORE_DWORDX2 v[ +; SI: [[BCNT]]:[[ZERO]]] +; SI: S_ENDPGM +define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i32 %cond) { +entry: + %0 = icmp eq i32 %cond, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64 addrspace(1)* %in + %2 = call i64 @llvm.ctpop.i64(i64 %1) + br label %endif + +else: + %3 = getelementptr i64 addrspace(1)* %in, i32 1 + %4 = load i64 addrspace(1)* %3 + br label %endif + +endif: + %5 = phi i64 [%2, %if], [%4, %else] + store i64 %5, i64 addrspace(1)* %out + ret void +} From 2a069ac1f392699388772f69a6528963c29a6bf6 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 20 Jun 2014 17:51:47 +0000 Subject: [PATCH 0095/1155] [RuntimeDyld] Support more PPC64 relocations This adds support for several missing PPC64 relocations in the straight-forward manner to RuntimeDyldELF.cpp. Note that this actually fixes a failure of a large-model test case on PowerPC, allowing the XFAIL to be removed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211382 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../RuntimeDyld/RuntimeDyldELF.cpp | 74 ++++++++++++++++++- test/ExecutionEngine/MCJIT/eh-lg-pic.ll | 2 +- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 56471f43b2e2..f6348be746d4 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -702,24 +702,37 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj, llvm_unreachable("Attempting to get address of ODP entry!"); } -// Relocation masks following the #lo(value), #hi(value), #higher(value), -// and #highest(value) macros defined in section 4.5.1. Relocation Types -// in PPC-elf64abi document. -// +// Relocation masks following the #lo(value), #hi(value), #ha(value), +// #higher(value), #highera(value), #highest(value), and #highesta(value) +// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi +// document. + static inline uint16_t applyPPClo(uint64_t value) { return value & 0xffff; } static inline uint16_t applyPPChi(uint64_t value) { return (value >> 16) & 0xffff; } +static inline uint16_t applyPPCha (uint64_t value) { + return ((value + 0x8000) >> 16) & 0xffff; +} + static inline uint16_t applyPPChigher(uint64_t value) { return (value >> 32) & 0xffff; } +static inline uint16_t applyPPChighera (uint64_t value) { + return ((value + 0x8000) >> 32) & 0xffff; +} + static inline uint16_t applyPPChighest(uint64_t value) { return (value >> 48) & 0xffff; } +static inline uint16_t applyPPChighesta (uint64_t value) { + return ((value + 0x8000) >> 48) & 0xffff; +} + void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { @@ -728,24 +741,57 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, default: llvm_unreachable("Relocation type not implemented yet!"); break; + case ELF::R_PPC64_ADDR16: + writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); + break; + case ELF::R_PPC64_ADDR16_DS: + writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3); + break; case ELF::R_PPC64_ADDR16_LO: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_LO_DS: + writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3); + break; case ELF::R_PPC64_ADDR16_HI: writeInt16BE(LocalAddress, applyPPChi(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_HA: + writeInt16BE(LocalAddress, applyPPCha(Value + Addend)); + break; case ELF::R_PPC64_ADDR16_HIGHER: writeInt16BE(LocalAddress, applyPPChigher(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_HIGHERA: + writeInt16BE(LocalAddress, applyPPChighera(Value + Addend)); + break; case ELF::R_PPC64_ADDR16_HIGHEST: writeInt16BE(LocalAddress, applyPPChighest(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_HIGHESTA: + writeInt16BE(LocalAddress, applyPPChighesta(Value + Addend)); + break; case ELF::R_PPC64_ADDR14: { assert(((Value + Addend) & 3) == 0); // Preserve the AA/LK bits in the branch instruction uint8_t aalk = *(LocalAddress + 3); writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); } break; + case ELF::R_PPC64_REL16_LO: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t Delta = Value - FinalAddress + Addend; + writeInt16BE(LocalAddress, applyPPClo(Delta)); + } break; + case ELF::R_PPC64_REL16_HI: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t Delta = Value - FinalAddress + Addend; + writeInt16BE(LocalAddress, applyPPChi(Delta)); + } break; + case ELF::R_PPC64_REL16_HA: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t Delta = Value - FinalAddress + Addend; + writeInt16BE(LocalAddress, applyPPCha(Delta)); + } break; case ELF::R_PPC64_ADDR32: { int32_t Result = static_cast(Value + Addend); if (SignExtend32<32>(Result) != Result) @@ -784,10 +830,30 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt16BE(LocalAddress, applyPPClo(Value)); } break; case ELF::R_PPC64_TOC16_DS: { + uint64_t TOCStart = findPPC64TOC(); + Value = ((Value + Addend) - TOCStart); + writeInt16BE(LocalAddress, applyPPClo(Value) & ~3); + } break; + case ELF::R_PPC64_TOC16_LO: { uint64_t TOCStart = findPPC64TOC(); Value = ((Value + Addend) - TOCStart); writeInt16BE(LocalAddress, applyPPClo(Value)); } break; + case ELF::R_PPC64_TOC16_LO_DS: { + uint64_t TOCStart = findPPC64TOC(); + Value = ((Value + Addend) - TOCStart); + writeInt16BE(LocalAddress, applyPPClo(Value) & ~3); + } break; + case ELF::R_PPC64_TOC16_HI: { + uint64_t TOCStart = findPPC64TOC(); + Value = ((Value + Addend) - TOCStart); + writeInt16BE(LocalAddress, applyPPChi(Value)); + } break; + case ELF::R_PPC64_TOC16_HA: { + uint64_t TOCStart = findPPC64TOC(); + Value = ((Value + Addend) - TOCStart); + writeInt16BE(LocalAddress, applyPPCha(Value)); + } break; } } diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll index 7c0227d74ed6..539c890f9186 100644 --- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll +++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll @@ -1,5 +1,5 @@ ; RUN: %lli_mcjit -relocation-model=pic -code-model=large %s -; XFAIL: cygwin, win32, mingw, mips, powerpc64, i686, i386, aarch64, arm +; XFAIL: cygwin, win32, mingw, mips, i686, i386, aarch64, arm declare i8* @__cxa_allocate_exception(i64) declare void @__cxa_throw(i8*, i8*, i8*) declare i32 @__gxx_personality_v0(...) From 602941c737718bf875934f5121615eaf2596f196 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Fri, 20 Jun 2014 18:07:34 +0000 Subject: [PATCH 0096/1155] Fix a warning about the use of const being ignored with a cast. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211383 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 1e105d3973b7..a072b0f28ca1 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1384,7 +1384,7 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, LibrariesShortNames.push_back(StringRef()); continue; } - char *P = (char *)(Libraries[i]) + D.dylib.name; + const char *P = (const char *)(Libraries[i]) + D.dylib.name; StringRef Name = StringRef(P); StringRef Suffix; bool isFramework; From e29fc75ea56f08887fb8f05c9b991ff1a0f6c6dc Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 20 Jun 2014 18:17:56 +0000 Subject: [PATCH 0097/1155] [RuntimeDyld] Fix ppc64 stub relocations on little-endian When RuntimeDyldELF creates stub functions, it needs to install relocations that will resolve to the final address of the target routine. Since those are 16-bit relocs, they need to be applied to the least-significant halfword of the instruction. On big-endian ppc64, this means that addresses have to be adjusted by 2, which is what the code currently does. However, on a little-endian system, the address must *not* be adjusted; the least-significant halfword is the first one. This patch updates the RuntimeDyldELF code to take the target byte order into account. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211384 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../RuntimeDyld/RuntimeDyldELF.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index f6348be746d4..03e0a36ab90e 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1205,14 +1205,20 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( ELF::R_PPC64_ADDR64, Value.Addend); // Generates the 64-bits address loads as exemplified in section - // 4.5.1 in PPC64 ELF ABI. - RelocationEntry REhst(SectionID, StubTargetAddr - Section.Address + 2, + // 4.5.1 in PPC64 ELF ABI. Note that the relocations need to + // apply to the low part of the instructions, so we have to update + // the offset according to the target endianness. + uint64_t StubRelocOffset = StubTargetAddr - Section.Address; + if (!IsTargetLittleEndian) + StubRelocOffset += 2; + + RelocationEntry REhst(SectionID, StubRelocOffset + 0, ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend); - RelocationEntry REhr(SectionID, StubTargetAddr - Section.Address + 6, + RelocationEntry REhr(SectionID, StubRelocOffset + 4, ELF::R_PPC64_ADDR16_HIGHER, Value.Addend); - RelocationEntry REh(SectionID, StubTargetAddr - Section.Address + 14, + RelocationEntry REh(SectionID, StubRelocOffset + 12, ELF::R_PPC64_ADDR16_HI, Value.Addend); - RelocationEntry REl(SectionID, StubTargetAddr - Section.Address + 18, + RelocationEntry REl(SectionID, StubRelocOffset + 16, ELF::R_PPC64_ADDR16_LO, Value.Addend); if (Value.SymbolName) { From 8d99e7650fc9e590544c543724af432bc57de27e Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Fri, 20 Jun 2014 19:00:41 +0000 Subject: [PATCH 0098/1155] [Make] Fix dependencies for td.expanded Depend on all the .td files not just the main one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211390 91177308-0d34-0410-b5e6-96231b3b80d8 --- Makefile.rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rules b/Makefile.rules index ff0a3e3f8191..ebebc0a85c4f 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -1725,7 +1725,7 @@ $(ObjDir)/%GenDFAPacketizer.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN) # Dump all the records to .td.expanded. This is useful for debugging. $(TARGET:%=%.td.expanded): \ -%.td.expanded : %.td $(LLVM_TBLGEN) +%.td.expanded : %.td $(LLVM_TBLGEN) $(TDFiles) $(Echo) "Building a fully expanded version of $( Date: Fri, 20 Jun 2014 19:11:56 +0000 Subject: [PATCH 0099/1155] =?UTF-8?q?Commited=20patch=20from=20Bj=C3=B6rn?= =?UTF-8?q?=20Steinbrink:=20Summary:=20Different=20range=20metadata=20can?= =?UTF-8?q?=20lead=20to=20different=20optimizations=20in=20later=20passes,?= =?UTF-8?q?=20possibly=20breaking=20the=20semantics=20of=20the=20merged=20?= =?UTF-8?q?function.=20So=20range=20metadata=20must=20be=20taken=20into=20?= =?UTF-8?q?consideration=20when=20comparing=20Load=20instructions.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211391 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 7 ++++- test/Transforms/MergeFunc/ranges.ll | 43 +++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/MergeFunc/ranges.ll diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index c9b4af769136..c66df600700b 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -325,6 +325,7 @@ class FunctionComparator { /// 6.1.Load: volatile (as boolean flag) /// 6.2.Load: alignment (as integer numbers) /// 6.3.Load: synch-scope (as integer numbers) + /// 6.4.Load: range metadata (as integer numbers) /// On this stage its better to see the code, since its not more than 10-15 /// strings for particular instruction, and could change sometimes. int cmpOperation(const Instruction *L, const Instruction *R) const; @@ -788,7 +789,11 @@ int FunctionComparator::cmpOperation(const Instruction *L, if (int Res = cmpNumbers(LI->getOrdering(), cast(R)->getOrdering())) return Res; - return cmpNumbers(LI->getSynchScope(), cast(R)->getSynchScope()); + if (int Res = + cmpNumbers(LI->getSynchScope(), cast(R)->getSynchScope())) + return Res; + return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range), + (uint64_t)cast(R)->getMetadata(LLVMContext::MD_range)); } if (const StoreInst *SI = dyn_cast(L)) { if (int Res = diff --git a/test/Transforms/MergeFunc/ranges.ll b/test/Transforms/MergeFunc/ranges.ll new file mode 100644 index 000000000000..e25ff1d3acb1 --- /dev/null +++ b/test/Transforms/MergeFunc/ranges.ll @@ -0,0 +1,43 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s +define i1 @cmp_with_range(i8*, i8*) { + %v1 = load i8* %0, !range !0 + %v2 = load i8* %1, !range !0 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +define i1 @cmp_no_range(i8*, i8*) { +; CHECK-LABEL: @cmp_no_range +; CHECK-NEXT %v1 = load i8* %0 +; CHECK-NEXT %v2 = load i8* %1 +; CHECK-NEXT %out = icmp eq i8 %v1, %v2 +; CHECK-NEXT ret i1 %out + %v1 = load i8* %0 + %v2 = load i8* %1 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +define i1 @cmp_different_range(i8*, i8*) { +; CHECK-LABEL: @cmp_different_range +; CHECK-NEXT: %v1 = load i8* %0, !range !1 +; CHECK-NEXT: %v2 = load i8* %1, !range !1 +; CHECK-NEXT: %out = icmp eq i8 %v1, %v2 +; CHECK-NEXT: ret i1 %out + %v1 = load i8* %0, !range !1 + %v2 = load i8* %1, !range !1 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +define i1 @cmp_with_same_range(i8*, i8*) { +; CHECK-LABEL: @cmp_with_same_range +; CHECK: tail call i1 @cmp_with_range + %v1 = load i8* %0, !range !0 + %v2 = load i8* %1, !range !0 + %out = icmp eq i8 %v1, %v2 + ret i1 %out +} + +!0 = metadata !{i8 0, i8 2} +!1 = metadata !{i8 5, i8 7} From 20dead8d0ce64f70e1dcf558f69b6d725ba18d9d Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 20 Jun 2014 19:54:13 +0000 Subject: [PATCH 0100/1155] Fix some -Wsign-compare fallout from changing container count member functions to return unsigned instead of bool. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211393 91177308-0d34-0410-b5e6-96231b3b80d8 --- unittests/ADT/DenseMapTest.cpp | 2 +- unittests/ADT/DenseSetTest.cpp | 2 +- unittests/IR/ValueMapTest.cpp | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp index bb7797cf149d..75a910a24cab 100644 --- a/unittests/ADT/DenseMapTest.cpp +++ b/unittests/ADT/DenseMapTest.cpp @@ -345,7 +345,7 @@ TEST(DenseMapCustomTest, FindAsTest) { EXPECT_EQ(3u, map.size()); // Normal lookup tests - EXPECT_EQ(1, map.count(1)); + EXPECT_EQ(1u, map.count(1)); EXPECT_EQ(1u, map.find(0)->second); EXPECT_EQ(2u, map.find(1)->second); EXPECT_EQ(3u, map.find(2)->second); diff --git a/unittests/ADT/DenseSetTest.cpp b/unittests/ADT/DenseSetTest.cpp index ada5f6db83af..154c5892d5fd 100644 --- a/unittests/ADT/DenseSetTest.cpp +++ b/unittests/ADT/DenseSetTest.cpp @@ -24,7 +24,7 @@ TEST_F(DenseSetTest, DoubleEntrySetTest) { set.insert(0); set.insert(1); // Original failure was an infinite loop in this call: - EXPECT_EQ(0, set.count(2)); + EXPECT_EQ(0u, set.count(2)); } } diff --git a/unittests/IR/ValueMapTest.cpp b/unittests/IR/ValueMapTest.cpp index 51acd2fc641a..ef8258882a64 100644 --- a/unittests/IR/ValueMapTest.cpp +++ b/unittests/IR/ValueMapTest.cpp @@ -48,13 +48,13 @@ TYPED_TEST(ValueMapTest, FollowsValue) { ValueMap VM; VM[this->BitcastV.get()] = 7; EXPECT_EQ(7, VM.lookup(this->BitcastV.get())); - EXPECT_EQ(0, VM.count(this->AddV.get())); + EXPECT_EQ(0u, VM.count(this->AddV.get())); this->BitcastV->replaceAllUsesWith(this->AddV.get()); EXPECT_EQ(7, VM.lookup(this->AddV.get())); - EXPECT_EQ(0, VM.count(this->BitcastV.get())); + EXPECT_EQ(0u, VM.count(this->BitcastV.get())); this->AddV.reset(); - EXPECT_EQ(0, VM.count(this->AddV.get())); - EXPECT_EQ(0, VM.count(this->BitcastV.get())); + EXPECT_EQ(0u, VM.count(this->AddV.get())); + EXPECT_EQ(0u, VM.count(this->BitcastV.get())); EXPECT_EQ(0U, VM.size()); } @@ -90,7 +90,7 @@ TYPED_TEST(ValueMapTest, OperationsWork) { EXPECT_EQ(this->AddV.get(), InsertResult1.first->first); EXPECT_EQ(3, InsertResult1.first->second); EXPECT_TRUE(InsertResult1.second); - EXPECT_EQ(true, VM.count(this->AddV.get())); + EXPECT_EQ(1u, VM.count(this->AddV.get())); std::pair::iterator, bool> InsertResult2 = VM.insert(std::make_pair(this->AddV.get(), 5)); EXPECT_EQ(this->AddV.get(), InsertResult2.first->first); @@ -169,7 +169,7 @@ TYPED_TEST(ValueMapTest, DefaultCollisionBehavior) { VM[this->BitcastV.get()] = 7; VM[this->AddV.get()] = 9; this->BitcastV->replaceAllUsesWith(this->AddV.get()); - EXPECT_EQ(0, VM.count(this->BitcastV.get())); + EXPECT_EQ(0u, VM.count(this->BitcastV.get())); EXPECT_EQ(9, VM.lookup(this->AddV.get())); } @@ -218,7 +218,7 @@ TYPED_TEST(ValueMapTest, NoFollowRAUW) { ValueMap > VM; VM[this->BitcastV.get()] = 7; EXPECT_EQ(7, VM.lookup(this->BitcastV.get())); - EXPECT_EQ(0, VM.count(this->AddV.get())); + EXPECT_EQ(0u, VM.count(this->AddV.get())); this->BitcastV->replaceAllUsesWith(this->AddV.get()); EXPECT_EQ(7, VM.lookup(this->BitcastV.get())); EXPECT_EQ(0, VM.lookup(this->AddV.get())); @@ -284,11 +284,11 @@ TYPED_TEST(ValueMapTest, SurvivesModificationByConfig) { // Now the ModifyingConfig can modify the Map inside a callback. VM[this->BitcastV.get()] = 7; this->BitcastV->replaceAllUsesWith(this->AddV.get()); - EXPECT_FALSE(VM.count(this->BitcastV.get())); - EXPECT_FALSE(VM.count(this->AddV.get())); + EXPECT_EQ(0u, VM.count(this->BitcastV.get())); + EXPECT_EQ(0u, VM.count(this->AddV.get())); VM[this->AddV.get()] = 7; this->AddV.reset(); - EXPECT_FALSE(VM.count(this->AddV.get())); + EXPECT_EQ(0u, VM.count(this->AddV.get())); } } From 5b8e73ef81bdbc3ce4d020f66d875e22827b7626 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 20 Jun 2014 20:35:47 +0000 Subject: [PATCH 0101/1155] Generate native unwind info on Win64 This patch enables LLVM to emit Win64-native unwind info rather than DWARF CFI. It handles all corner cases (I hope), including stack realignment. Because the unwind info is not flexible enough to describe stack frames with a gap of unknown size in the middle, such as the one caused by stack realignment, I modified register spilling code to place all spills into the fixed frame slots, so that they can be accessed relative to the frame pointer. Patch by Vadim Chugunov! Reviewed By: rnk Differential Revision: http://reviews.llvm.org/D4081 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211399 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineFrameInfo.h | 3 + include/llvm/MC/MCAsmInfo.h | 1 + include/llvm/Target/TargetFrameLowering.h | 13 + lib/CodeGen/AsmPrinter/Win64Exception.cpp | 13 +- lib/CodeGen/MachineFunction.cpp | 45 ++- lib/CodeGen/PrologEpilogInserter.cpp | 87 ++--- lib/MC/MCObjectFileInfo.cpp | 15 +- lib/MC/MCStreamer.cpp | 2 + lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 12 +- lib/Target/X86/X86FrameLowering.cpp | 355 +++++++++++++------ lib/Target/X86/X86FrameLowering.h | 9 +- lib/Target/X86/X86ISelLowering.cpp | 5 +- lib/Target/X86/X86InstrCompiler.td | 24 +- lib/Target/X86/X86MCInstLower.cpp | 37 ++ test/CodeGen/X86/2007-05-05-Personality.ll | 6 +- test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll | 4 +- test/CodeGen/X86/avx-intel-ocl.ll | 62 ++-- test/CodeGen/X86/gcc_except_table.ll | 10 +- test/CodeGen/X86/win64_eh.ll | 170 +++++++++ 19 files changed, 645 insertions(+), 228 deletions(-) create mode 100644 test/CodeGen/X86/win64_eh.ll diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index bd0ea119114c..c51f8fe03bbf 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -484,6 +484,9 @@ class MachineFrameInfo { /// int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable); + /// CreateFixedSpillStackObject - Create a spill slot at a fixed location + /// on the stack. Returns an index with a negative value. + int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset); /// isFixedObjectIndex - Returns true if the specified index corresponds to a /// fixed stack object. diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 55dc40afe5ab..17465543001b 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -481,6 +481,7 @@ class MCAsmInfo { bool isExceptionHandlingDwarf() const { return (ExceptionsType == ExceptionHandling::DwarfCFI || ExceptionsType == ExceptionHandling::ARM || + // Win64 handler data still uses DWARF LSDA encoding. ExceptionsType == ExceptionHandling::Win64); } bool doesDwarfUseRelocationsAcrossSections() const { diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h index 7c42e23a11d1..bfddd0601794 100644 --- a/include/llvm/Target/TargetFrameLowering.h +++ b/include/llvm/Target/TargetFrameLowering.h @@ -93,6 +93,19 @@ class TargetFrameLowering { /// stack pointer. virtual bool isFPCloseToIncomingSP() const { return true; } + /// assignCalleeSavedSpillSlots - Allows target to override spill slot + /// assignment logic. If implemented, assignCalleeSavedSpillSlots() should + /// assign frame slots to all CSI entries and return true. If this method + /// returns false, spill slots will be assigned using generic implementation. + /// assignCalleeSavedSpillSlots() may add, delete or rearrange elements of + /// CSI. + virtual bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const { + return false; + } + /// getCalleeSavedSpillSlots - This method returns a pointer to an array of /// pairs, that contains an entry for each callee saved register that must be /// spilled to a particular stack location if it is spilled. diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 4768a43e9a6f..89bf89fdd75d 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -77,9 +77,9 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality) return; - MCSymbol *GCCHandlerSym = - Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); - Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); + Asm->OutStreamer.EmitWin64EHHandler(PersHandlerSym, true, true); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -98,15 +98,8 @@ void Win64Exception::endFunction(const MachineFunction *) { MMI->TidyLandingPads(); if (shouldEmitPersonality) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - const MCSymbol *Sym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer.PushSection(); Asm->OutStreamer.EmitWin64EHHandlerData(); - Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), - 4); emitExceptionTable(); Asm->OutStreamer.PopSection(); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index eb3d71fef5c7..6138aef4adc1 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -457,7 +457,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a /// normal 'L' label is returned. -MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); @@ -530,10 +530,9 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -548,10 +547,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -571,16 +569,30 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*Alloca*/ nullptr)); return -++NumFixedObjects; } +/// CreateFixedSpillStackObject - Create a spill slot at a fixed location +/// on the stack. Returns an index with a negative value. +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset) { + unsigned StackAlign = getFrameLowering()->getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, + /*Immutable*/ true, + /*isSS*/ true, + /*Alloca*/ nullptr)); + return -++NumFixedObjects; +} BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { @@ -849,11 +861,10 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, if (isa(A->getType()) || isa(A->getType()) || isa(B->getType()) || isa(B->getType())) return false; - + // For now, only support constants with the same size. uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); - if (StoreSize != TD->getTypeStoreSize(B->getType()) || - StoreSize > 128) + if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128) return false; Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); @@ -882,7 +893,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 1ba2c7418f21..69ff6b3d929c 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -268,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } } - if (CSI.empty()) - return; // Early exit if no callee saved registers are modified! - - unsigned NumFixedSpillSlots; - const TargetFrameLowering::SpillSlot *FixedSpillSlots = - TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); - - // Now that we know which registers need to be saved and restored, allocate - // stack slots for them. - for (std::vector::iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); - - int FrameIdx; - if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { - I->setFrameIdx(FrameIdx); - continue; - } + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector::iterator I = CSI.begin(), E = CSI.end(); + I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } - // Check to see if this physreg must be spilled to a particular stack slot - // on this target. - const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; - while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->Reg != Reg) - ++FixedSlot; - - if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { - // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); - unsigned StackAlign = TFI->getStackAlignment(); - - // We may not be able to satisfy the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. Use the - // min. - Align = std::min(Align, StackAlign); - FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; - } else { - // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); - } + } else { + // Spill it to the stack where we must. + FrameIdx = + MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } - I->setFrameIdx(FrameIdx); + I->setFrameIdx(FrameIdx); + } } MFI->setCalleeSavedInfo(CSI); diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index d12f60c09920..4df9221d6c4a 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -632,11 +632,16 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { // though it contains relocatable pointers. In PIC mode, this is probably a // big runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. - LSDASection = - Ctx->getCOFFSection(".gcc_except_table", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + assert(T.isOSWindows() && "Windows is the only supported COFF target"); + if (T.getArch() == Triple::x86_64) { + // On Windows 64 with SEH, the LSDA is emitted into the .xdata section + LSDASection = 0; + } else { + LSDASection = Ctx->getCOFFSection(".gcc_except_table", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + } // Debug info. COFFDebugSymbolsSection = diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 7dccf0d751d6..2a6097a7b435 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -526,6 +526,8 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { report_fatal_error("Frame register and offset already specified!"); if (Offset & 0x0F) report_fatal_error("Misaligned frame pointer offset!"); + if (Offset > 240) + report_fatal_error("Frame offset must be less than or equal to 240!"); MCSymbol *Label = getContext().CreateTempSymbol(); MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset); EmitLabel(Label); diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 39480eaaac16..5a3f28c3f706 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -142,8 +142,11 @@ getNonexecutableStackSection(MCContext &Ctx) const { void X86MCAsmInfoMicrosoft::anchor() { } X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) + if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; + PointerSize = 8; + ExceptionsType = ExceptionHandling::Win64; + } AssemblerDialect = AsmWriterFlavor; @@ -157,17 +160,18 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { void X86MCAsmInfoGNUCOFF::anchor() { } X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { + assert(Triple.isOSWindows() && "Windows is the only supported COFF target"); if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PointerSize = 8; + ExceptionsType = ExceptionHandling::Win64; + } else { + ExceptionsType = ExceptionHandling::DwarfCFI; } AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; - UseIntegratedAssembler = true; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index fab0560e3bcd..e6433139cb25 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -305,9 +306,10 @@ static bool isEAXLiveIn(MachineFunction &MF) { return false; } -void X86FrameLowering::emitCalleeSavedFrameMoves( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned FramePtr) const { +void +X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); @@ -318,53 +320,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( const std::vector &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; - const X86RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = -RegInfo->getSlotSize(); - - // FIXME: This is dirty hack. The code itself is pretty mess right now. - // It should be rewritten from scratch and generalized sometimes. - - // Determine maximum offset (minimum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; - - // Don't output a new machine move if we're re-saving the frame - // pointer. This happens when the PrologEpilogInserter has inserted an extra - // "PUSH" of the frame pointer -- the "emitPrologue" method automatically - // generates one when frame pointers are used. If we generate a "machine - // move" for this extra "PUSH", the linker will lose track of the fact that - // the frame pointer should have the value of the first "PUSH" when it's - // trying to unwind. - // - // FIXME: This looks inelegant. It's possibly correct, but it's covering up - // another bug. I.e., one where we generate a prolog like this: - // - // pushl %ebp - // movl %esp, %ebp - // pushl %ebp - // pushl %esi - // ... - // - // The immediate re-push of EBP is unnecessary. At the least, it's an - // optimization bug. EBP can be used as a scratch register in certain - // cases, but probably not when we have a frame pointer. - if (HasFP && FramePtr == Reg) - continue; unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned CFIIndex = @@ -396,6 +356,84 @@ static bool usesTheStack(const MachineFunction &MF) { /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to /// generate the exception handling frames. + +/* + Here's a gist of what gets emitted: + + ; Establish frame pointer, if needed + [if needs FP] + push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + .seh_pushreg %rpb + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + + ; Spill general-purpose registers + [for all callee-saved GPRs] + pushq % + [if not needs FP] + .cfi_def_cfa_offset (offset from RETADDR) + .seh_pushreg % + + ; If the required stack alignment > default stack alignment + ; rsp needs to be re-aligned. This creates a "re-alignment gap" + ; of unknown size in the stack frame. + [if stack needs re-alignment] + and $MASK, %rsp + + ; Allocate space for locals + [if target is Windows and allocated space > 4096 bytes] + ; Windows needs special care for allocations larger + ; than one page. + mov $NNN, %rax + call ___chkstk_ms/___chkstk + sub %rax, %rsp + [else] + sub $NNN, %rsp + + [if needs FP] + .seh_stackalloc (size of XMM spill slots) + .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots + [else] + .seh_stackalloc NNN + + ; Spill XMMs + ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, + ; they may get spilled on any platform, if the current function + ; calls @llvm.eh.unwind.init + [if needs FP] + [for all callee-saved XMM registers] + movaps %, -MMM(%rbp) + [for all callee-saved XMM registers] + .seh_savexmm %, (-MMM + SEHFrameOffset) + ; i.e. the offset relative to (%rbp - SEHFrameOffset) + [else] + [for all callee-saved XMM registers] + movaps %, KKK(%rsp) + [for all callee-saved XMM registers] + .seh_savexmm %, KKK + + .seh_endprologue + + [if needs base pointer] + mov %rsp, %rbx + + ; Emit CFI info + [if needs FP] + [for all callee-saved registers] + .cfi_offset %, (offset from %rbp) + [else] + .cfi_def_cfa_offset (offset from RETADDR) + [for all callee-saved registers] + .cfi_offset %, (offset from %rsp) + + Notes: + - .seh directives are emitted only for Windows 64 ABI + - .cfi directives are emitted for all other ABIs + - for 32-bit code, substitute %e?? registers for %r?? +*/ + void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -406,8 +444,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - bool needsFrameMoves = MMI.hasDebugInfo() || - Fn->needsUnwindTableEntry(); uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); @@ -415,6 +451,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { bool Is64Bit = STI.is64Bit(); bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); + bool IsSEH = + MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() == + ExceptionHandling::Win64; // Not necessarily synonymous with IsWin64. + bool NeedsWin64SEH = IsSEH && Fn->needsUnwindTableEntry(); + bool NeedsDwarfCFI = + !IsSEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -512,7 +554,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(FramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -530,13 +572,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); } + if (NeedsWin64SEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } + // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. // Define the current CFA to use the EBP/RBP register. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); @@ -546,9 +594,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); } - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); - I != E; ++I) + // Mark the FramePtr as live-in in every block. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); } else { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); @@ -562,10 +609,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; - MBBI->setFlag(MachineInstr::FrameSetup); + unsigned Reg = MBBI->getOperand(0).getReg(); ++MBBI; - if (!HasFP && needsFrameMoves) { + if (!HasFP && NeedsDwarfCFI) { // Mark callee-saved push instruction. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -575,16 +622,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); StackOffset += stackGrowth; } + + if (NeedsWin64SEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( + MachineInstr::FrameSetup); + } } // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). - - // NOTE: We push the registers before realigning the stack, so - // vector callee-saved (xmm) registers may be saved w/o proper - // alignment in this way. However, currently these regs are saved in - // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so - // this shouldn't be a problem. if (RegInfo->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); MachineInstr *MI = @@ -683,23 +729,85 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } - } else if (NumBytes) + } else if (NumBytes) { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); + } + + int SEHFrameOffset = 0; + if (NeedsWin64SEH) { + if (HasFP) { + // We need to set frame base offset low enough such that all saved + // register offsets would be positive relative to it, but we can't + // just use NumBytes, because .seh_setframe offset must be <=240. + // So we pretend to have only allocated enough space to spill the + // non-volatile registers. + // We don't care about the rest of stack allocation, because unwinder + // will restore SP to (BP - SEHFrameOffset) + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + int offset = MFI->getObjectOffset(Info.getFrameIdx()); + SEHFrameOffset = std::max(SEHFrameOffset, abs(offset)); + } + SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant + + // This only needs to account for XMM spill slots, GPR slots + // are covered by .seh_pushreg's emitted above. + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(SEHFrameOffset - X86FI->getCalleeSavedFrameSize()) + .setMIFlag(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) + .addImm(FramePtr) + .addImm(SEHFrameOffset) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // SP will be the base register for restoring XMMs + if (NumBytes) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } + + // Skip the rest of register spilling code + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + // Emit SEH info for non-GPRs + if (NeedsWin64SEH) { + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + unsigned Reg = Info.getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class"); + + int Offset = getFrameIndexOffset(MF, Info.getFrameIdx()); + Offset += SEHFrameOffset; + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) + .addImm(Reg) + .addImm(Offset) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) + .setMIFlag(MachineInstr::FrameSetup); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. + // Update the base pointer with the current stack pointer. unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { + if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -714,7 +822,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Emit DWARF info specifying the offsets of the callee-saved registers. if (PushedRegs) - emitCalleeSavedFrameMoves(MBB, MBBI, DL, HasFP ? FramePtr : StackPtr); + emitCalleeSavedFrameMoves(MBB, MBBI, DL); } } @@ -974,48 +1082,98 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return getFrameIndexOffset(MF, FI); } -bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; +bool X86FrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector &CSI) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + unsigned SlotSize = RegInfo->getSlotSize(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + + unsigned CalleeSavedFrameSize = 0; + int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); + + if (hasFP(MF)) { + // emitPrologue always spills frame register the first thing. + SpillSlotOffset -= SlotSize; + MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + + // Since emitPrologue and emitEpilogue will handle spilling and restoring of + // the frame register, we can delete it from CSI list and not have to worry + // about avoiding it later. + unsigned FPReg = RegInfo->getFrameRegister(MF); + for (unsigned i = 0; i < CSI.size(); ++i) { + if (CSI[i].getReg() == FPReg) { + CSI.erase(CSI.begin() + i); + break; + } + } + } + + // Assign slots for GPRs. It increases frame size. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) + continue; + + SpillSlotOffset -= SlotSize; + CalleeSavedFrameSize += SlotSize; + + int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + } + + X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); + + // Assign slots for XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + // ensure alignment + SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment(); + // spill into slot + SpillSlotOffset -= RC->getSize(); + int SlotIndex = + MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + MFI->ensureMaxAlignment(RC->getAlignment()); + } + + return true; +} + +bool X86FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(MI); MachineFunction &MF = *MBB.getParent(); - const X86RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - unsigned SlotSize = RegInfo->getSlotSize(); - unsigned FPReg = TRI->getFrameRegister(MF); - unsigned CalleeFrameSize = 0; - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo(); const X86Subtarget &STI = MF.getTarget().getSubtarget(); // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (Reg == FPReg) - // X86RegisterInfo::emitPrologue will handle spilling of frame register. - continue; - CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); - // Make XMM regs spilled. X86 does not have ability of push/pop XMM. // It can be done by spilling XMMs to stack frame. - // Note that only Win64 ABI might spill XMMs. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (X86::GR64RegClass.contains(Reg) || @@ -1024,8 +1182,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RC, TRI); + --MI; + MI->setFlag(MachineInstr::FrameSetup); + ++MI; } return true; @@ -1050,22 +1212,19 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); } // POP GPRs. - unsigned FPReg = TRI->getFrameRegister(MF); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; - if (Reg == FPReg) - // X86RegisterInfo::emitEpilogue will handle restoring of frame register. - continue; + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); } return true; @@ -1096,22 +1255,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, TailCallReturnAddrDelta - SlotSize, true); } - if (hasFP(MF)) { - assert((TailCallReturnAddrDelta <= 0) && - "The Delta should always be zero or negative"); - const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); - - // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MFI->CreateFixedObject(SlotSize, - -(int)SlotSize + - TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta, - true); - assert(FrameIdx == MFI->getObjectIndexBegin() && - "Slot for EBP register must be last in order to be found!"); - (void)FrameIdx; - } - // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 5c43c1488d4a..5ad3d4dc2e97 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -27,8 +27,8 @@ class X86FrameLowering : public TargetFrameLowering { : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned FramePtr) const; + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -42,6 +42,11 @@ class X86FrameLowering : public TargetFrameLowering { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = nullptr) const override; + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 09018a5e1187..5cabd14064ed 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -605,9 +605,8 @@ void X86TargetLowering::resetOperationActions() { } // FIXME - use subtarget debug flags - if (!Subtarget->isTargetDarwin() && - !Subtarget->isTargetELF() && - !Subtarget->isTargetCygMing()) { + if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && + !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 9b3dce52f72e..77a876df02ac 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -110,7 +110,7 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is -// allocated by bumping the stack pointer. Otherwise memory is allocated from +// allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in @@ -196,6 +196,26 @@ let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { "#EH_SjLj_Setup\t$dst", []>; } +//===----------------------------------------------------------------------===// +// Pseudo instructions used by unwind info. +// +let isPseudo = 1 in { + def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), + "#SEH_PushReg $reg", []>; + def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveReg $reg, $dst", []>; + def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveXMM $reg, $dst", []>; + def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), + "#SEH_StackAlloc $size", []>; + def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), + "#SEH_SetFrame $reg, $offset", []>; + def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), + "#SEH_PushFrame $mode", []>; + def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), + "#SEH_EndPrologue", []>; +} + //===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // @@ -371,7 +391,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in { def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[In64BitMode]>; - + let Uses = [RAX,RCX,RDI] in def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)], IIC_REP_STOS>, REP, diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 0190080b935b..3314c641c9ea 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -779,6 +780,9 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM, void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); + const X86RegisterInfo *RI = + static_cast(TM.getRegisterInfo()); + switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); @@ -883,6 +887,39 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(X86::R10) .addReg(X86::RAX)); return; + + case X86::SEH_PushReg: + OutStreamer.EmitWin64EHPushReg( + RI->getSEHRegNum(MI->getOperand(0).getImm())); + return; + + case X86::SEH_SaveReg: + OutStreamer.EmitWin64EHSaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_SaveXMM: + OutStreamer.EmitWin64EHSaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_StackAlloc: + OutStreamer.EmitWin64EHAllocStack(MI->getOperand(0).getImm()); + return; + + case X86::SEH_SetFrame: + OutStreamer.EmitWin64EHSetFrame( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_PushFrame: + OutStreamer.EmitWin64EHPushFrame(MI->getOperand(0).getImm()); + return; + + case X86::SEH_EndPrologue: + OutStreamer.EmitWin64EHEndProlog(); + return; } MCInst TmpInst; diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll index 5b8fe72b5d0f..b99c58c6e4af 100644 --- a/test/CodeGen/X86/2007-05-05-Personality.ll +++ b/test/CodeGen/X86/2007-05-05-Personality.ll @@ -1,12 +1,14 @@ ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s --check-prefix=LIN -; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=LIN ; RUN: llc < %s -mtriple=i386-pc-mingw32 -o - | FileCheck %s --check-prefix=WIN ; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN +; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN64 ; LIN: .cfi_personality 0, __gnat_eh_personality ; LIN: .cfi_lsda 0, .Lexception0 ; WIN: .cfi_personality 0, ___gnat_eh_personality ; WIN: .cfi_lsda 0, Lexception0 +; WIN64: .seh_handler __gnat_eh_personality +; WIN64: .seh_handlerdata @error = external global i8 @@ -15,7 +17,7 @@ entry: invoke void @raise() to label %eh_then unwind label %unwind -unwind: ; preds = %entry +unwind: ; preds = %entry %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*) catch i8* @error %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1 diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll index 1259cf47b2bc..dfb98bb1ab39 100644 --- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll +++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll @@ -1,7 +1,7 @@ ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s ; CHECK: subq $40, %rsp -; CHECK: movaps %xmm8, (%rsp) -; CHECK: movaps %xmm7, 16(%rsp) +; CHECK: movaps %xmm8, 16(%rsp) +; CHECK: movaps %xmm7, (%rsp) define i32 @a() nounwind { entry: diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll index 7337815a39ac..3e051bff768d 100644 --- a/test/CodeGen/X86/avx-intel-ocl.ll +++ b/test/CodeGen/X86/avx-intel-ocl.ll @@ -7,21 +7,21 @@ declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *) declare <16 x float> @func_float16(<16 x float>, <16 x float>) declare i32 @func_int(i32, i32) -; WIN64: testf16_inp +; WIN64-LABEL: testf16_inp ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} ; WIN64: leaq {{.*}}(%rsp), %rcx ; WIN64: call ; WIN64: ret -; X32: testf16_inp +; X32-LABEL: testf16_inp ; X32: movl %eax, (%esp) ; X32: vaddps {{.*}}, {{%ymm[0-1]}} ; X32: vaddps {{.*}}, {{%ymm[0-1]}} ; X32: call ; X32: ret -; X64: testf16_inp +; X64-LABEL: testf16_inp ; X64: vaddps {{.*}}, {{%ymm[0-1]}} ; X64: vaddps {{.*}}, {{%ymm[0-1]}} ; X64: leaq {{.*}}(%rsp), %rdi @@ -41,14 +41,14 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { ;test calling conventions - preserved registers ; preserved ymm6-ymm15 -; WIN64: testf16_regs +; WIN64-LABEL: testf16_regs ; WIN64: call ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: ret ; preserved ymm8-ymm15 -; X64: testf16_regs +; X64-LABEL: testf16_regs ; X64: call ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} @@ -65,28 +65,30 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { } ; test calling conventions - prolog and epilog -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64-LABEL: test_prolog_epilog +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill ; WIN64: call -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload - +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload + +; X64-LABEL: test_prolog_epilog ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill @@ -111,12 +113,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl ; test functions with integer parameters ; pass parameters on stack for 32-bit platform +; X32-LABEL: test_int ; X32: movl {{.*}}, 4(%esp) ; X32: movl {{.*}}, (%esp) ; X32: call ; X32: addl {{.*}}, %eax ; pass parameters in registers for 64-bit platform +; X64-LABEL: test_int ; X64: leal {{.*}}, %edi ; X64: movl {{.*}}, %esi ; X64: call @@ -128,21 +132,21 @@ define i32 @test_int(i32 %a, i32 %b) nounwind { ret i32 %c } -; WIN64: test_float4 +; WIN64-LABEL: test_float4 ; WIN64-NOT: vzeroupper ; WIN64: call ; WIN64-NOT: vzeroupper ; WIN64: call ; WIN64: ret -; X64: test_float4 +; X64-LABEL: test_float4 ; X64-NOT: vzeroupper ; X64: call ; X64-NOT: vzeroupper ; X64: call ; X64: ret -; X32: test_float4 +; X32-LABEL: test_float4 ; X32: vzeroupper ; X32: call ; X32: vzeroupper diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll index 8c328ec58f93..a732eb1efbd7 100644 --- a/test/CodeGen/X86/gcc_except_table.ll +++ b/test/CodeGen/X86/gcc_except_table.ll @@ -13,14 +13,14 @@ define i32 @main() uwtable optsize ssp { ; APPLE: GCC_except_table0: ; APPLE: Lexception0: -; MINGW64: .cfi_startproc -; MINGW64: .cfi_personality 0, __gxx_personality_v0 -; MINGW64: .cfi_lsda 0, .Lexception0 -; MINGW64: .cfi_def_cfa_offset 16 +; MINGW64: .seh_proc +; MINGW64: .seh_handler __gxx_personality_v0 +; MINGW64: .seh_setframe 5, 0 ; MINGW64: callq _Unwind_Resume -; MINGW64: .cfi_endproc +; MINGW64: .seh_handlerdata ; MINGW64: GCC_except_table0: ; MINGW64: Lexception0: +; MINGW64: .seh_endproc ; MINGW32: .cfi_startproc ; MINGW32: .cfi_personality 0, ___gxx_personality_v0 diff --git a/test/CodeGen/X86/win64_eh.ll b/test/CodeGen/X86/win64_eh.ll new file mode 100644 index 000000000000..f1f874eb2f5a --- /dev/null +++ b/test/CodeGen/X86/win64_eh.ll @@ -0,0 +1,170 @@ +; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64 + +; Check function without prolog +define void @foo0() uwtable { +entry: + ret void +} +; WIN64-LABEL: foo0: +; WIN64: .seh_proc foo0 +; WIN64: .seh_endprologue +; WIN64: ret +; WIN64: .seh_endproc + +; Checks a small stack allocation +define void @foo1() uwtable { +entry: + %baz = alloca [2000 x i16], align 2 + ret void +} +; WIN64-LABEL: foo1: +; WIN64: .seh_proc foo1 +; WIN64: subq $4000, %rsp +; WIN64: .seh_stackalloc 4000 +; WIN64: .seh_endprologue +; WIN64: addq $4000, %rsp +; WIN64: ret +; WIN64: .seh_endproc + +; Checks a stack allocation requiring call to __chkstk/___chkstk_ms +define void @foo2() uwtable { +entry: + %baz = alloca [4000 x i16], align 2 + ret void +} +; WIN64-LABEL: foo2: +; WIN64: .seh_proc foo2 +; WIN64: movabsq $8000, %rax +; WIN64: callq {{__chkstk|___chkstk_ms}} +; WIN64: subq %rax, %rsp +; WIN64: .seh_stackalloc 8000 +; WIN64: .seh_endprologue +; WIN64: addq $8000, %rsp +; WIN64: ret +; WIN64: .seh_endproc + + +; Checks stack push +define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable { +entry: + %a = alloca i32 + %b = alloca i32 + %c = alloca i32 + %d = alloca i32 + %e = alloca i32 + %f = alloca i32 + store i32 %a_arg, i32* %a + store i32 %b_arg, i32* %b + store i32 %c_arg, i32* %c + store i32 %d_arg, i32* %d + store i32 %e_arg, i32* %e + store i32 %f_arg, i32* %f + %tmp = load i32* %a + %tmp1 = mul i32 %tmp, 2 + %tmp2 = load i32* %b + %tmp3 = mul i32 %tmp2, 3 + %tmp4 = add i32 %tmp1, %tmp3 + %tmp5 = load i32* %c + %tmp6 = mul i32 %tmp5, 5 + %tmp7 = add i32 %tmp4, %tmp6 + %tmp8 = load i32* %d + %tmp9 = mul i32 %tmp8, 7 + %tmp10 = add i32 %tmp7, %tmp9 + %tmp11 = load i32* %e + %tmp12 = mul i32 %tmp11, 11 + %tmp13 = add i32 %tmp10, %tmp12 + %tmp14 = load i32* %f + %tmp15 = mul i32 %tmp14, 13 + %tmp16 = add i32 %tmp13, %tmp15 + ret i32 %tmp16 +} +; WIN64-LABEL: foo3: +; WIN64: .seh_proc foo3 +; WIN64: pushq %rsi +; WIN64: .seh_pushreg 6 +; WIN64: subq $24, %rsp +; WIN64: .seh_stackalloc 24 +; WIN64: .seh_endprologue +; WIN64: addq $24, %rsp +; WIN64: popq %rsi +; WIN64: ret +; WIN64: .seh_endproc + + +; Check emission of eh handler and handler data +declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*) +declare void @_d_eh_resume_unwind(i8*) + +declare i32 @bar() + +define i32 @foo4() #0 { +entry: + %step = alloca i32, align 4 + store i32 0, i32* %step + %tmp = load i32* %step + + %tmp1 = invoke i32 @bar() + to label %finally unwind label %landingpad + +finally: + store i32 1, i32* %step + br label %endtryfinally + +landingpad: + %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality + cleanup + %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0 + store i32 2, i32* %step + call void @_d_eh_resume_unwind(i8* %tmp3) + unreachable + +endtryfinally: + %tmp10 = load i32* %step + ret i32 %tmp10 +} +; WIN64-LABEL: foo4: +; WIN64: .seh_proc foo4 +; WIN64: .seh_handler _d_eh_personality, @unwind, @except +; WIN64: subq $56, %rsp +; WIN64: .seh_stackalloc 56 +; WIN64: .seh_endprologue +; WIN64: addq $56, %rsp +; WIN64: ret +; WIN64: .seh_handlerdata +; WIN64: .seh_endproc + + +; Check stack re-alignment and xmm spilling +define void @foo5() uwtable { +entry: + %s = alloca i32, align 64 + call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"() + ret void +} +; WIN64-LABEL: foo5: +; WIN64: .seh_proc foo5 +; WIN64: pushq %rbp +; WIN64: .seh_pushreg 5 +; WIN64: movq %rsp, %rbp +; WIN64: pushq %rdi +; WIN64: .seh_pushreg 7 +; WIN64: pushq %rbx +; WIN64: .seh_pushreg 3 +; WIN64: andq $-64, %rsp +; WIN64: subq $128, %rsp +; WIN64: .seh_stackalloc 48 +; WIN64: .seh_setframe 5, 64 +; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill +; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill +; WIN64: .seh_savexmm 6, 16 +; WIN64: .seh_savexmm 7, 32 +; WIN64: .seh_endprologue +; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload +; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload +; WIN64: leaq -16(%rbp), %rsp +; WIN64: popq %rbx +; WIN64: popq %rdi +; WIN64: popq %rbp +; WIN64: retq +; WIN64: .seh_endproc From 91e18f7639c61ca40efe39efbfcba825570a824e Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Fri, 20 Jun 2014 21:07:14 +0000 Subject: [PATCH 0102/1155] Revert "Replace Execution Engine's mutex with std::recursive_mutex." This reverts commit 1f502bd9d7d2c1f98ad93a09ffe435e11a95aedd, due to GCC / MinGW's lack of support for C++11 threading. It's possible this will go back in after we come up with a reasonable solution. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211401 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/ExecutionEngine/ExecutionEngine.h | 7 ++-- include/llvm/IR/ValueMap.h | 12 +++--- lib/ExecutionEngine/ExecutionEngine.cpp | 16 ++++---- lib/ExecutionEngine/JIT/JIT.cpp | 38 +++++++++---------- lib/ExecutionEngine/JIT/JITEmitter.cpp | 22 +++++------ lib/ExecutionEngine/MCJIT/MCJIT.cpp | 37 +++++++++--------- unittests/IR/ValueMapTest.cpp | 8 ++-- 7 files changed, 71 insertions(+), 69 deletions(-) diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index d349af810650..e5dab6191ab6 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -22,10 +22,10 @@ #include "llvm/IR/ValueMap.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Mutex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include -#include #include #include @@ -42,6 +42,7 @@ class JITEventListener; class JITMemoryManager; class MachineCodeInfo; class Module; +class MutexGuard; class ObjectCache; class RTDyldMemoryManager; class Triple; @@ -58,7 +59,7 @@ class ExecutionEngineState { public: struct AddressMapConfig : public ValueMapConfig { typedef ExecutionEngineState *ExtraData; - static std::recursive_mutex *getMutex(ExecutionEngineState *EES); + static sys::Mutex *getMutex(ExecutionEngineState *EES); static void onDelete(ExecutionEngineState *EES, const GlobalValue *Old); static void onRAUW(ExecutionEngineState *, const GlobalValue *, const GlobalValue *); @@ -163,7 +164,7 @@ class ExecutionEngine { /// lock - This lock protects the ExecutionEngine, MCJIT, JIT, JITResolver and /// JITEmitter classes. It must be held while changing the internal state of /// any of those classes. - std::recursive_mutex lock; + sys::Mutex lock; //===--------------------------------------------------------------------===// // ExecutionEngine Startup diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index 3d575ac874d9..43a79c7db2b9 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -28,9 +28,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Mutex.h" #include "llvm/Support/type_traits.h" #include -#include namespace llvm { @@ -45,7 +45,7 @@ class ValueMapConstIterator; /// This class defines the default behavior for configurable aspects of /// ValueMap<>. User Configs should inherit from this class to be as compatible /// as possible with future versions of ValueMap. -template +template struct ValueMapConfig { typedef MutexT mutex_type; @@ -217,11 +217,11 @@ class ValueMapCallbackVH : public CallbackVH { ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); if (M) - M->lock(); + M->acquire(); Config::onDelete(Copy.Map->Data, Copy.Unwrap()); // May destroy *this. Copy.Map->Map.erase(Copy); // Definitely destroys *this. if (M) - M->unlock(); + M->release(); } void allUsesReplacedWith(Value *new_key) override { assert(isa(new_key) && @@ -230,7 +230,7 @@ class ValueMapCallbackVH : public CallbackVH { ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); if (M) - M->lock(); + M->acquire(); KeyT typed_new_key = cast(new_key); // Can destroy *this: @@ -246,7 +246,7 @@ class ValueMapCallbackVH : public CallbackVH { } } if (M) - M->unlock(); + M->release(); } }; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 553ceb4575d5..9154fe2f5ff4 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -166,7 +166,7 @@ void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) { } void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { - std::lock_guard locked(lock); + MutexGuard locked(lock); DEBUG(dbgs() << "JIT: Map \'" << GV->getName() << "\' to [" << Addr << "]\n";); @@ -184,14 +184,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { } void ExecutionEngine::clearAllGlobalMappings() { - std::lock_guard locked(lock); + MutexGuard locked(lock); EEState.getGlobalAddressMap().clear(); EEState.getGlobalAddressReverseMap().clear(); } void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { - std::lock_guard locked(lock); + MutexGuard locked(lock); for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) EEState.RemoveMapping(FI); @@ -201,7 +201,7 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { } void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { - std::lock_guard locked(lock); + MutexGuard locked(lock); ExecutionEngineState::GlobalAddressMapTy &Map = EEState.getGlobalAddressMap(); @@ -228,7 +228,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { } void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { - std::lock_guard locked(lock); + MutexGuard locked(lock); ExecutionEngineState::GlobalAddressMapTy::iterator I = EEState.getGlobalAddressMap().find(GV); @@ -236,7 +236,7 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { } const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { - std::lock_guard locked(lock); + MutexGuard locked(lock); // If we haven't computed the reverse mapping yet, do so first. if (EEState.getGlobalAddressReverseMap().empty()) { @@ -555,7 +555,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { if (Function *F = const_cast(dyn_cast(GV))) return getPointerToFunction(F); - std::lock_guard locked(lock); + MutexGuard locked(lock); if (void *P = EEState.getGlobalAddressMap()[GV]) return P; @@ -1346,7 +1346,7 @@ ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE) : EE(EE), GlobalAddressMap(this) { } -std::recursive_mutex * +sys::Mutex * ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) { return &EES->EE.lock; } diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 463fa299b3f3..83ec9784b98e 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -96,18 +96,18 @@ namespace { /// bugpoint or gdb users to search for a function by name without any context. class JitPool { SmallPtrSet JITs; // Optimize for process containing just 1 JIT. - mutable std::recursive_mutex Lock; + mutable sys::Mutex Lock; public: void Add(JIT *jit) { - std::lock_guard guard(Lock); + MutexGuard guard(Lock); JITs.insert(jit); } void Remove(JIT *jit) { - std::lock_guard guard(Lock); + MutexGuard guard(Lock); JITs.erase(jit); } void *getPointerToNamedFunction(const char *Name) const { - std::lock_guard guard(Lock); + MutexGuard guard(Lock); assert(JITs.size() != 0 && "No Jit registered"); //search function in every instance of JIT for (SmallPtrSet::const_iterator Jit = JITs.begin(), @@ -150,7 +150,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, AllJits->Add(this); // Add target data - std::lock_guard locked(lock); + MutexGuard locked(lock); FunctionPassManager &PM = jitstate->getPM(); M->setDataLayout(TM.getDataLayout()); PM.add(new DataLayoutPass(M)); @@ -177,7 +177,7 @@ JIT::~JIT() { /// addModule - Add a new Module to the JIT. If we previously removed the last /// Module, we need re-initialize jitstate with a valid Module. void JIT::addModule(Module *M) { - std::lock_guard locked(lock); + MutexGuard locked(lock); if (Modules.empty()) { assert(!jitstate && "jitstate should be NULL if Modules vector is empty!"); @@ -206,7 +206,7 @@ void JIT::addModule(Module *M) { bool JIT::removeModule(Module *M) { bool result = ExecutionEngine::removeModule(M); - std::lock_guard locked(lock); + MutexGuard locked(lock); if (jitstate && jitstate->getModule() == M) { delete jitstate; @@ -408,13 +408,13 @@ GenericValue JIT::runFunction(Function *F, void JIT::RegisterJITEventListener(JITEventListener *L) { if (!L) return; - std::lock_guard locked(lock); + MutexGuard locked(lock); EventListeners.push_back(L); } void JIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; - std::lock_guard locked(lock); + MutexGuard locked(lock); std::vector::reverse_iterator I= std::find(EventListeners.rbegin(), EventListeners.rend(), L); if (I != EventListeners.rend()) { @@ -426,14 +426,14 @@ void JIT::NotifyFunctionEmitted( const Function &F, void *Code, size_t Size, const JITEvent_EmittedFunctionDetails &Details) { - std::lock_guard locked(lock); + MutexGuard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details); } } void JIT::NotifyFreeingMachineCode(void *OldPtr) { - std::lock_guard locked(lock); + MutexGuard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyFreeingMachineCode(OldPtr); } @@ -444,7 +444,7 @@ void JIT::NotifyFreeingMachineCode(void *OldPtr) { /// GlobalAddress[F] with the address of F's machine code. /// void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { - std::lock_guard locked(lock); + MutexGuard locked(lock); class MCIListener : public JITEventListener { MachineCodeInfo *const MCI; @@ -505,7 +505,7 @@ void *JIT::getPointerToFunction(Function *F) { if (void *Addr = getPointerToGlobalIfAvailable(F)) return Addr; // Check if function already code gen'd - std::lock_guard locked(lock); + MutexGuard locked(lock); // Now that this thread owns the lock, make sure we read in the function if it // exists in this Module. @@ -534,7 +534,7 @@ void *JIT::getPointerToFunction(Function *F) { } void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { - std::lock_guard locked(lock); + MutexGuard locked(lock); BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap().find(BB); @@ -546,7 +546,7 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { } void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { - std::lock_guard locked(lock); + MutexGuard locked(lock); getBasicBlockAddressMap().erase(BB); } @@ -555,7 +555,7 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) { (void)getPointerToFunction(BB->getParent()); // resolve basic block address - std::lock_guard locked(lock); + MutexGuard locked(lock); BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap().find(BB); @@ -592,7 +592,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { - std::lock_guard locked(lock); + MutexGuard locked(lock); void *Ptr = getPointerToGlobalIfAvailable(GV); if (Ptr) return Ptr; @@ -666,7 +666,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { size_t S = getDataLayout()->getTypeAllocSize(GlobalType); size_t A = getDataLayout()->getPreferredAlignment(GV); if (GV->isThreadLocal()) { - std::lock_guard locked(lock); + MutexGuard locked(lock); Ptr = TJI.allocateThreadLocalMemory(S); } else if (TJI.allocateSeparateGVMemory()) { if (A <= 8) { @@ -687,7 +687,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { } void JIT::addPendingFunction(Function *F) { - std::lock_guard locked(lock); + MutexGuard locked(lock); jitstate->getPendingFunctions().push_back(F); } diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 1e1f5d55802c..50b8c10b638b 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetJITInfo.h" @@ -49,7 +50,6 @@ #ifndef NDEBUG #include #endif -#include using namespace llvm; #define DEBUG_TYPE "jit" @@ -230,22 +230,22 @@ namespace { std::map Map; /// Guards Map from concurrent accesses. - mutable std::recursive_mutex Lock; + mutable sys::Mutex Lock; public: /// Registers a Stub to be resolved by Resolver. void RegisterStubResolver(void *Stub, JITResolver *Resolver) { - std::lock_guard guard(Lock); + MutexGuard guard(Lock); Map.insert(std::make_pair(Stub, Resolver)); } /// Unregisters the Stub when it's invalidated. void UnregisterStubResolver(void *Stub) { - std::lock_guard guard(Lock); + MutexGuard guard(Lock); Map.erase(Stub); } /// Returns the JITResolver instance that owns the Stub. JITResolver *getResolverFromStub(void *Stub) const { - std::lock_guard guard(Lock); + MutexGuard guard(Lock); // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. // This is the same trick as in LookupFunctionFromCallSite from @@ -258,7 +258,7 @@ namespace { /// True if any stubs refer to the given resolver. Only used in an assert(). /// O(N) bool ResolverHasStubs(JITResolver* Resolver) const { - std::lock_guard guard(Lock); + MutexGuard guard(Lock); for (std::map::const_iterator I = Map.begin(), E = Map.end(); I != E; ++I) { if (I->second == Resolver) @@ -494,7 +494,7 @@ JITResolver::~JITResolver() { /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub /// if it has already been created. void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { - std::lock_guard guard(TheJIT->lock); + MutexGuard locked(TheJIT->lock); // If we already have a stub for this function, recycle it. return state.getFunctionToLazyStubMap().lookup(F); @@ -503,7 +503,7 @@ void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { /// getFunctionStub - This returns a pointer to a function stub, creating /// one on demand as needed. void *JITResolver::getLazyFunctionStub(Function *F) { - std::lock_guard guard(TheJIT->lock); + MutexGuard locked(TheJIT->lock); // If we already have a lazy stub for this function, recycle it. void *&Stub = state.getFunctionToLazyStubMap()[F]; @@ -564,7 +564,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { /// getGlobalValueIndirectSym - Return a lazy pointer containing the specified /// GV address. void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { - std::lock_guard guard(TheJIT->lock); + MutexGuard locked(TheJIT->lock); // If we already have a stub for this global variable, recycle it. void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV]; @@ -622,7 +622,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { // Only lock for getting the Function. The call getPointerToFunction made // in this function might trigger function materializing, which requires // JIT lock to be unlocked. - std::lock_guard guard(JR->TheJIT->lock); + MutexGuard locked(JR->TheJIT->lock); // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. @@ -654,7 +654,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { } // Reacquire the lock to update the GOT map. - std::lock_guard locked(JR->TheJIT->lock); + MutexGuard locked(JR->TheJIT->lock); // We might like to remove the call site from the CallSiteToFunction map, but // we can't do that! Multiple threads could be stuck, waiting to acquire the diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index f149edcb7b04..e9ba96a6496f 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; @@ -65,7 +66,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, } MCJIT::~MCJIT() { - std::lock_guard locked(lock); + MutexGuard locked(lock); // FIXME: We are managing our modules, so we do not want the base class // ExecutionEngine to manage them as well. To avoid double destruction // of the first (and only) module added in ExecutionEngine constructor @@ -101,12 +102,12 @@ MCJIT::~MCJIT() { } void MCJIT::addModule(Module *M) { - std::lock_guard locked(lock); + MutexGuard locked(lock); OwnedModules.addModule(M); } bool MCJIT::removeModule(Module *M) { - std::lock_guard locked(lock); + MutexGuard locked(lock); return OwnedModules.removeModule(M); } @@ -128,12 +129,12 @@ void MCJIT::addArchive(object::Archive *A) { void MCJIT::setObjectCache(ObjectCache* NewCache) { - std::lock_guard locked(lock); + MutexGuard locked(lock); ObjCache = NewCache; } ObjectBufferStream* MCJIT::emitObject(Module *M) { - std::lock_guard locked(lock); + MutexGuard locked(lock); // This must be a module which has already been added but not loaded to this // MCJIT instance, since these conditions are tested by our caller, @@ -173,7 +174,7 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) { void MCJIT::generateCodeForModule(Module *M) { // Get a thread lock to make sure we aren't trying to load multiple times - std::lock_guard locked(lock); + MutexGuard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && @@ -213,7 +214,7 @@ void MCJIT::generateCodeForModule(Module *M) { } void MCJIT::finalizeLoadedModules() { - std::lock_guard locked(lock); + MutexGuard locked(lock); // Resolve any outstanding relocations. Dyld.resolveRelocations(); @@ -229,7 +230,7 @@ void MCJIT::finalizeLoadedModules() { // FIXME: Rename this. void MCJIT::finalizeObject() { - std::lock_guard locked(lock); + MutexGuard locked(lock); for (ModulePtrSet::iterator I = OwnedModules.begin_added(), E = OwnedModules.end_added(); @@ -242,7 +243,7 @@ void MCJIT::finalizeObject() { } void MCJIT::finalizeModule(Module *M) { - std::lock_guard locked(lock); + MutexGuard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module."); @@ -267,7 +268,7 @@ uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { Module *MCJIT::findModuleForSymbol(const std::string &Name, bool CheckFunctionsOnly) { - std::lock_guard locked(lock); + MutexGuard locked(lock); // If it hasn't already been generated, see if it's in one of our modules. for (ModulePtrSet::iterator I = OwnedModules.begin_added(), @@ -291,7 +292,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name, uint64_t MCJIT::getSymbolAddress(const std::string &Name, bool CheckFunctionsOnly) { - std::lock_guard locked(lock); + MutexGuard locked(lock); // First, check to see if we already have this symbol. uint64_t Addr = getExistingSymbolAddress(Name); @@ -335,7 +336,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, } uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { - std::lock_guard locked(lock); + MutexGuard locked(lock); uint64_t Result = getSymbolAddress(Name, false); if (Result != 0) finalizeLoadedModules(); @@ -343,7 +344,7 @@ uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { } uint64_t MCJIT::getFunctionAddress(const std::string &Name) { - std::lock_guard locked(lock); + MutexGuard locked(lock); uint64_t Result = getSymbolAddress(Name, true); if (Result != 0) finalizeLoadedModules(); @@ -352,7 +353,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) { // Deprecated. Use getFunctionAddress instead. void *MCJIT::getPointerToFunction(Function *F) { - std::lock_guard locked(lock); + MutexGuard locked(lock); if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); @@ -551,13 +552,13 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name, void MCJIT::RegisterJITEventListener(JITEventListener *L) { if (!L) return; - std::lock_guard locked(lock); + MutexGuard locked(lock); EventListeners.push_back(L); } void MCJIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; - std::lock_guard locked(lock); + MutexGuard locked(lock); SmallVector::reverse_iterator I= std::find(EventListeners.rbegin(), EventListeners.rend(), L); if (I != EventListeners.rend()) { @@ -566,14 +567,14 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) { } } void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) { - std::lock_guard locked(lock); + MutexGuard locked(lock); MemMgr.notifyObjectLoaded(this, &Obj); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyObjectEmitted(Obj); } } void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) { - std::lock_guard locked(lock); + MutexGuard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyFreeingObject(Obj); } diff --git a/unittests/IR/ValueMapTest.cpp b/unittests/IR/ValueMapTest.cpp index ef8258882a64..0b7198fc6ecb 100644 --- a/unittests/IR/ValueMapTest.cpp +++ b/unittests/IR/ValueMapTest.cpp @@ -186,19 +186,19 @@ struct LockMutex : ValueMapConfig { }; static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) { *Data.CalledRAUW = true; - EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked."; + EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked."; } static void onDelete(const ExtraData &Data, KeyT Old) { *Data.CalledDeleted = true; - EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked."; + EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked."; } static MutexT *getMutex(const ExtraData &Data) { return Data.M; } }; #if LLVM_ENABLE_THREADS TYPED_TEST(ValueMapTest, LocksMutex) { - std::mutex M; // Not recursive. + sys::Mutex M(false); // Not recursive. bool CalledRAUW = false, CalledDeleted = false; - typedef LockMutex ConfigType; + typedef LockMutex ConfigType; typename ConfigType::ExtraData Data = {&M, &CalledRAUW, &CalledDeleted}; ValueMap VM(Data); VM[this->BitcastV.get()] = 7; From 85fdfe8efcb582f4d6d564a85c5c6a37a97f9787 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 21:15:27 +0000 Subject: [PATCH 0103/1155] Don't produce eh_frame relocations when targeting the IOS simulator. First step for fixing pr19185. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211404 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 5 +++-- test/MC/MachO/eh-frame-reloc.s | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 5a3f28c3f706..d2e8caf728c5 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -74,8 +74,9 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { // FIXME: this should not depend on the target OS version, but on the ld64 // version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified - // FDE relocs may be used. - DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6); + // FDE relocs may be used. We also use them for the ios simulator. + DwarfFDESymbolsUseAbsDiff = (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + || T.isiOS(); UseIntegratedAssembler = true; } diff --git a/test/MC/MachO/eh-frame-reloc.s b/test/MC/MachO/eh-frame-reloc.s index 1b1c674afdb4..66ff571eac89 100644 --- a/test/MC/MachO/eh-frame-reloc.s +++ b/test/MC/MachO/eh-frame-reloc.s @@ -1,5 +1,6 @@ // RUN: llvm-mc < %s -triple=x86_64-apple-macosx10.7 -filetype=obj | llvm-readobj -r | FileCheck %s // RUN: llvm-mc < %s -triple=x86_64-apple-macosx10.6 -filetype=obj | llvm-readobj -r | FileCheck %s +// RUN: llvm-mc < %s -triple=x86_64-apple-ios7.0.0 -filetype=obj | llvm-readobj -r | FileCheck %s // RUN: llvm-mc < %s -triple=x86_64-apple-macosx10.5 -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=OLD64 %s // RUN: llvm-mc < %s -triple=i686-apple-macosx10.6 -filetype=obj | llvm-readobj -r | FileCheck %s // RUN: llvm-mc < %s -triple=i686-apple-macosx10.5 -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=OLD32 %s From efc447827d22c67dbd6aaff6f4160e710583c606 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Fri, 20 Jun 2014 21:29:27 +0000 Subject: [PATCH 0104/1155] Fix some double printing of filenames for archives in llvm-nm when the tool is given multiple files. Also fix the same issue with Mach-O universal files. And fix the newline spacing to separate the output in these cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211405 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Object/archive-symtab.test | 6 ++++-- tools/llvm-nm/llvm-nm.cpp | 23 +++++++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test index 88c9c989dcf6..5e5292c86e19 100644 --- a/test/Object/archive-symtab.test +++ b/test/Object/archive-symtab.test @@ -12,7 +12,8 @@ CHECK: trivial-object-test.elf-x86-64: CHECK-NEXT: U SomeOtherFunction CHECK-NEXT: 0000000000000000 T main CHECK-NEXT: U puts -CHECK-NEXT: trivial-object-test2.elf-x86-64: + +CHECK: trivial-object-test2.elf-x86-64: CHECK-NEXT: 0000000000000000 t bar CHECK-NEXT: 0000000000000006 T foo CHECK-NEXT: 0000000000000016 T main @@ -40,7 +41,8 @@ CORRUPT: trivial-object-test.elf-x86-64: CORRUPT-NEXT: U SomeOtherFunction CORRUPT-NEXT: 0000000000000000 T main CORRUPT-NEXT: U puts -CORRUPT-NEXT: trivial-object-test2.elf-x86-64: + +CORRUPT: trivial-object-test2.elf-x86-64: CORRUPT-NEXT: 0000000000000000 t bar CORRUPT-NEXT: 0000000000000006 T foo CORRUPT-NEXT: 0000000000000016 T main diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index cccddb0736d1..5221d4d18d9a 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -370,7 +370,7 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I, outs() << "\n"; } -static void sortAndPrintSymbolList(SymbolicFile *Obj) { +static void sortAndPrintSymbolList(SymbolicFile *Obj, bool printName) { if (!NoSort) { if (NumericSort) std::sort(SymbolList.begin(), SymbolList.end(), compareSymbolAddress); @@ -380,9 +380,9 @@ static void sortAndPrintSymbolList(SymbolicFile *Obj) { std::sort(SymbolList.begin(), SymbolList.end(), compareSymbolName); } - if (OutputFormat == posix && MultipleFiles) { + if (OutputFormat == posix && MultipleFiles && printName) { outs() << '\n' << CurrentFilename << ":\n"; - } else if (OutputFormat == bsd && MultipleFiles) { + } else if (OutputFormat == bsd && MultipleFiles && printName) { outs() << "\n" << CurrentFilename << ":\n"; } else if (OutputFormat == sysv) { outs() << "\n\nSymbols from " << CurrentFilename << ":\n\n" @@ -664,7 +664,7 @@ static char getNMTypeChar(SymbolicFile *Obj, basic_symbol_iterator I) { return Ret; } -static void dumpSymbolNamesFromObject(SymbolicFile *Obj) { +static void dumpSymbolNamesFromObject(SymbolicFile *Obj, bool printName) { basic_symbol_iterator IBegin = Obj->symbol_begin(); basic_symbol_iterator IEnd = Obj->symbol_end(); if (DynamicSyms) { @@ -717,7 +717,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile *Obj) { } CurrentFilename = Obj->getFileName(); - sortAndPrintSymbolList(Obj); + sortAndPrintSymbolList(Obj, printName); } static void dumpSymbolNamesFromFile(std::string &Filename) { @@ -757,12 +757,13 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { if (ChildOrErr.getError()) continue; if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { + outs() << "\n"; if (isa(O)) { outs() << Filename << "(" << O->getFileName() << ")"; } else outs() << O->getFileName(); outs() << ":\n"; - dumpSymbolNamesFromObject(O); + dumpSymbolNamesFromObject(O, false); } } return; @@ -775,11 +776,13 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { std::unique_ptr Obj; std::unique_ptr A; if (!I->getAsObjectFile(Obj)) { + if (moreThanOneArch) + outs() << "\n"; outs() << Obj->getFileName(); if (isa(Obj.get()) && moreThanOneArch) outs() << " (for architecture " << I->getArchTypeName() << ")"; outs() << ":\n"; - dumpSymbolNamesFromObject(Obj.get()); + dumpSymbolNamesFromObject(Obj.get(), false); } else if (!I->getAsArchive(A)) { for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end(); @@ -789,7 +792,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { if (ChildOrErr.getError()) continue; if (SymbolicFile *O = dyn_cast(&*ChildOrErr.get())) { - outs() << A->getFileName(); + outs() << "\n" << A->getFileName(); if (isa(O)) { outs() << "(" << O->getFileName() << ")"; if (moreThanOneArch) @@ -797,7 +800,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { } else outs() << ":" << O->getFileName(); outs() << ":\n"; - dumpSymbolNamesFromObject(O); + dumpSymbolNamesFromObject(O, false); } } } @@ -805,7 +808,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) { return; } if (SymbolicFile *O = dyn_cast(Bin.get())) { - dumpSymbolNamesFromObject(O); + dumpSymbolNamesFromObject(O, true); return; } error("unrecognizable file type", Filename); From 2d3fb0a1710e88b49b5c254a9e85e560b7eb21f4 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 21:43:20 +0000 Subject: [PATCH 0105/1155] Support: Mark end of namespaces This convinces clang-format to leave a newline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211406 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/ScaledNumber.h | 5 +++-- unittests/Support/ScaledNumberTest.cpp | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index f6594ec6aa55..b5a1168f67af 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -97,8 +97,9 @@ inline std::pair getAdjusted64(uint64_t Digits, int16_t Scale = 0) { return getAdjusted(Digits, Scale); } -} -} + +} // end namespace ScaledNumbers +} // end namespace llvm #endif diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp index d1277d0479a6..e6d24d4512f2 100644 --- a/unittests/Support/ScaledNumberTest.cpp +++ b/unittests/Support/ScaledNumberTest.cpp @@ -78,4 +78,5 @@ TEST(FloatsTest, getAdjusted) { EXPECT_EQ(getAdjusted64(Max32In64 + 1), SP64(Max32In64 + 1, 0)); EXPECT_EQ(getAdjusted64(UINT64_MAX), SP64(UINT64_MAX, 0)); } -} + +} // end namespace From 97eb7882031bbfbef6e78af4d64a66494ff6750b Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 21:44:36 +0000 Subject: [PATCH 0106/1155] Support: Cleanup ScaledNumber::getAdjusted() doc git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211407 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/ScaledNumber.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index b5a1168f67af..8b7c5d0c4a04 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -67,10 +67,7 @@ inline std::pair getRounded64(uint64_t Digits, int16_t Scale, /// \brief Adjust a 64-bit scaled number down to the appropriate width. /// -/// Adjust a soft float with 64-bits of digits down, keeping as much -/// information as possible, and rounding up on half. -/// -/// \pre Adding 1 to \c Scale will not overflow INT16_MAX. +/// \pre Adding 64 to \c Scale will not overflow INT16_MAX. template inline std::pair getAdjusted(uint64_t Digits, int16_t Scale = 0) { From 67291098a60fbd8dd81be78385b1c88279a3dbc8 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 21:47:47 +0000 Subject: [PATCH 0107/1155] Support: Write ScaledNumber::getQuotient() and getProduct() git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211409 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 51 +------- include/llvm/Support/ScaledNumber.h | 76 +++++++++++ lib/Analysis/BlockFrequencyInfoImpl.cpp | 91 -------------- lib/Support/CMakeLists.txt | 1 + lib/Support/ScaledNumber.cpp | 119 ++++++++++++++++++ unittests/Support/ScaledNumberTest.cpp | 112 +++++++++++++++++ 6 files changed, 314 insertions(+), 136 deletions(-) create mode 100644 lib/Support/ScaledNumber.cpp diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 264aff372581..df4ebcad334f 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -77,9 +77,6 @@ class UnsignedFloatBase { return Lg.first + (Lg.second < 0); } - static std::pair divide64(uint64_t L, uint64_t R); - static std::pair multiply64(uint64_t L, uint64_t R); - static int compare(uint64_t L, uint64_t R, int Shift) { assert(Shift >= 0); assert(Shift < 64); @@ -315,8 +312,12 @@ template class UnsignedFloat : UnsignedFloatBase { UnsignedFloat inverse() const { return UnsignedFloat(*this).invert(); } private: - static UnsignedFloat getProduct(DigitsType L, DigitsType R); - static UnsignedFloat getQuotient(DigitsType Dividend, DigitsType Divisor); + static UnsignedFloat getProduct(DigitsType LHS, DigitsType RHS) { + return ScaledNumbers::getProduct(LHS, RHS); + } + static UnsignedFloat getQuotient(DigitsType Dividend, DigitsType Divisor) { + return ScaledNumbers::getQuotient(Dividend, Divisor); + } std::pair lgImpl() const; static int countLeadingZerosWidth(DigitsType Digits) { @@ -399,46 +400,6 @@ uint64_t UnsignedFloat::scale(uint64_t N) const { return UnsignedFloat(Digits, Exponent).scale(N); } -template -UnsignedFloat UnsignedFloat::getProduct(DigitsType L, - DigitsType R) { - // Check for zero. - if (!L || !R) - return getZero(); - - // Check for numbers that we can compute with 64-bit math. - if (Width <= 32 || (L <= UINT32_MAX && R <= UINT32_MAX)) - return adjustToWidth(uint64_t(L) * uint64_t(R), 0); - - // Do the full thing. - return UnsignedFloat(multiply64(L, R)); -} -template -UnsignedFloat UnsignedFloat::getQuotient(DigitsType Dividend, - DigitsType Divisor) { - // Check for zero. - if (!Dividend) - return getZero(); - if (!Divisor) - return getLargest(); - - if (Width == 64) - return UnsignedFloat(divide64(Dividend, Divisor)); - - // We can compute this with 64-bit math. - int Shift = countLeadingZeros64(Dividend); - uint64_t Shifted = uint64_t(Dividend) << Shift; - uint64_t Quotient = Shifted / Divisor; - - // If Quotient needs to be shifted, then adjustToWidth will round. - if (Quotient > DigitsLimits::max()) - return adjustToWidth(Quotient, -Shift); - - // Round based on the value of the next bit. - return getRounded(UnsignedFloat(Quotient, -Shift), - Shifted % Divisor >= getHalf(Divisor)); -} - template template IntT UnsignedFloat::toInt() const { diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index 8b7c5d0c4a04..6789b89f7e6b 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -95,6 +95,82 @@ inline std::pair getAdjusted64(uint64_t Digits, return getAdjusted(Digits, Scale); } +/// \brief Multiply two 64-bit integers to create a 64-bit scaled number. +/// +/// Implemented with four 64-bit integer multiplies. +std::pair multiply64(uint64_t LHS, uint64_t RHS); + +/// \brief Multiply two 32-bit integers to create a 32-bit scaled number. +/// +/// Implemented with one 64-bit integer multiply. +template +inline std::pair getProduct(DigitsT LHS, DigitsT RHS) { + static_assert(!std::numeric_limits::is_signed, "expected unsigned"); + + if (getWidth() <= 32 || (LHS <= UINT32_MAX && RHS <= UINT32_MAX)) + return getAdjusted(uint64_t(LHS) * RHS); + + return multiply64(LHS, RHS); +} + +/// \brief Convenience helper for 32-bit product. +inline std::pair getProduct32(uint32_t LHS, uint32_t RHS) { + return getProduct(LHS, RHS); +} + +/// \brief Convenience helper for 64-bit product. +inline std::pair getProduct64(uint64_t LHS, uint64_t RHS) { + return getProduct(LHS, RHS); +} + +/// \brief Divide two 64-bit integers to create a 64-bit scaled number. +/// +/// Implemented with long division. +/// +/// \pre \c Dividend and \c Divisor are non-zero. +std::pair divide64(uint64_t Dividend, uint64_t Divisor); + +/// \brief Divide two 32-bit integers to create a 32-bit scaled number. +/// +/// Implemented with one 64-bit integer divide/remainder pair. +/// +/// \pre \c Dividend and \c Divisor are non-zero. +std::pair divide32(uint32_t Dividend, uint32_t Divisor); + +/// \brief Divide two 32-bit numbers to create a 32-bit scaled number. +/// +/// Implemented with one 64-bit integer divide/remainder pair. +/// +/// Returns \c (DigitsT_MAX, INT16_MAX) for divide-by-zero (0 for 0/0). +template +std::pair getQuotient(DigitsT Dividend, DigitsT Divisor) { + static_assert(!std::numeric_limits::is_signed, "expected unsigned"); + static_assert(sizeof(DigitsT) == 4 || sizeof(DigitsT) == 8, + "expected 32-bit or 64-bit digits"); + + // Check for zero. + if (!Dividend) + return std::make_pair(0, 0); + if (!Divisor) + return std::make_pair(std::numeric_limits::max(), INT16_MAX); + + if (getWidth() == 64) + return divide64(Dividend, Divisor); + return divide32(Dividend, Divisor); +} + +/// \brief Convenience helper for 32-bit quotient. +inline std::pair getQuotient32(uint32_t Dividend, + uint32_t Divisor) { + return getQuotient(Dividend, Divisor); +} + +/// \brief Convenience helper for 64-bit quotient. +inline std::pair getQuotient64(uint64_t Dividend, + uint64_t Divisor) { + return getQuotient(Dividend, Divisor); +} + } // end namespace ScaledNumbers } // end namespace llvm diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 87d93a4bd5f9..edf1eca45cc3 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -216,97 +216,6 @@ void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) { << "]"; } -static std::pair -getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) { - if (ShouldRound) - if (!++N) - // Rounding caused an overflow. - return std::make_pair(UINT64_C(1), Shift + 64); - return std::make_pair(N, Shift); -} - -std::pair UnsignedFloatBase::divide64(uint64_t Dividend, - uint64_t Divisor) { - // Input should be sanitized. - assert(Divisor); - assert(Dividend); - - // Minimize size of divisor. - int16_t Shift = 0; - if (int Zeros = countTrailingZeros(Divisor)) { - Shift -= Zeros; - Divisor >>= Zeros; - } - - // Check for powers of two. - if (Divisor == 1) - return std::make_pair(Dividend, Shift); - - // Maximize size of dividend. - if (int Zeros = countLeadingZeros64(Dividend)) { - Shift -= Zeros; - Dividend <<= Zeros; - } - - // Start with the result of a divide. - uint64_t Quotient = Dividend / Divisor; - Dividend %= Divisor; - - // Continue building the quotient with long division. - // - // TODO: continue with largers digits. - while (!(Quotient >> 63) && Dividend) { - // Shift Dividend, and check for overflow. - bool IsOverflow = Dividend >> 63; - Dividend <<= 1; - --Shift; - - // Divide. - bool DoesDivide = IsOverflow || Divisor <= Dividend; - Quotient = (Quotient << 1) | uint64_t(DoesDivide); - Dividend -= DoesDivide ? Divisor : 0; - } - - // Round. - if (Dividend >= getHalf(Divisor)) - if (!++Quotient) - // Rounding caused an overflow in Quotient. - return std::make_pair(UINT64_C(1), Shift + 64); - - return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift); -} - -std::pair UnsignedFloatBase::multiply64(uint64_t L, - uint64_t R) { - // Separate into two 32-bit digits (U.L). - uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX; - - // Compute cross products. - uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR; - - // Sum into two 64-bit digits. - uint64_t Upper = P1, Lower = P4; - auto addWithCarry = [&](uint64_t N) { - uint64_t NewLower = Lower + (N << 32); - Upper += (N >> 32) + (NewLower < Lower); - Lower = NewLower; - }; - addWithCarry(P2); - addWithCarry(P3); - - // Check whether the upper digit is empty. - if (!Upper) - return std::make_pair(Lower, 0); - - // Shift as little as possible to maximize precision. - unsigned LeadingZeros = countLeadingZeros64(Upper); - int16_t Shift = 64 - LeadingZeros; - if (LeadingZeros) - Upper = Upper << LeadingZeros | Lower >> Shift; - bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1)); - return getRoundedFloat(Upper, ShouldRound, Shift); -} - //===----------------------------------------------------------------------===// // // BlockMass implementation. diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 2438d729d8de..354e2003b58d 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -42,6 +42,7 @@ add_llvm_library(LLVMSupport PluginLoader.cpp PrettyStackTrace.cpp Regex.cpp + ScaledNumber.cpp SmallPtrSet.cpp SmallVector.cpp SourceMgr.cpp diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp new file mode 100644 index 000000000000..e7531744b67a --- /dev/null +++ b/lib/Support/ScaledNumber.cpp @@ -0,0 +1,119 @@ +//==- lib/Support/ScaledNumber.cpp - Support for scaled numbers -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of some scaled number algorithms. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ScaledNumber.h" + +using namespace llvm; +using namespace llvm::ScaledNumbers; + +std::pair ScaledNumbers::multiply64(uint64_t LHS, + uint64_t RHS) { + // Separate into two 32-bit digits (U.L). + auto getU = [](uint64_t N) { return N >> 32; }; + auto getL = [](uint64_t N) { return N & UINT32_MAX; }; + uint64_t UL = getU(LHS), LL = getL(LHS), UR = getU(RHS), LR = getL(RHS); + + // Compute cross products. + uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR; + + // Sum into two 64-bit digits. + uint64_t Upper = P1, Lower = P4; + auto addWithCarry = [&](uint64_t N) { + uint64_t NewLower = Lower + (getL(N) << 32); + Upper += getU(N) + (NewLower < Lower); + Lower = NewLower; + }; + addWithCarry(P2); + addWithCarry(P3); + + // Check whether the upper digit is empty. + if (!Upper) + return std::make_pair(Lower, 0); + + // Shift as little as possible to maximize precision. + unsigned LeadingZeros = countLeadingZeros(Upper); + int Shift = 64 - LeadingZeros; + if (LeadingZeros) + Upper = Upper << LeadingZeros | Lower >> Shift; + return getRounded(Upper, Shift, + Shift && (Lower & UINT64_C(1) << (Shift - 1))); +} + +static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); } + +std::pair ScaledNumbers::divide32(uint32_t Dividend, + uint32_t Divisor) { + assert(Dividend && "expected non-zero dividend"); + assert(Divisor && "expected non-zero divisor"); + + // Use 64-bit math and canonicalize the dividend to gain precision. + uint64_t Dividend64 = Dividend; + int Shift = 0; + if (int Zeros = countLeadingZeros(Dividend64)) { + Shift -= Zeros; + Dividend64 <<= Zeros; + } + uint64_t Quotient = Dividend64 / Divisor; + uint64_t Remainder = Dividend64 % Divisor; + + // If Quotient needs to be shifted, leave the rounding to getAdjusted(). + if (Quotient > UINT32_MAX) + return getAdjusted(Quotient, Shift); + + // Round based on the value of the next bit. + return getRounded(Quotient, Shift, Remainder >= getHalf(Divisor)); +} + +std::pair ScaledNumbers::divide64(uint64_t Dividend, + uint64_t Divisor) { + assert(Dividend && "expected non-zero dividend"); + assert(Divisor && "expected non-zero divisor"); + + // Minimize size of divisor. + int Shift = 0; + if (int Zeros = countTrailingZeros(Divisor)) { + Shift -= Zeros; + Divisor >>= Zeros; + } + + // Check for powers of two. + if (Divisor == 1) + return std::make_pair(Dividend, Shift); + + // Maximize size of dividend. + if (int Zeros = countLeadingZeros(Dividend)) { + Shift -= Zeros; + Dividend <<= Zeros; + } + + // Start with the result of a divide. + uint64_t Quotient = Dividend / Divisor; + Dividend %= Divisor; + + // Continue building the quotient with long division. + while (!(Quotient >> 63) && Dividend) { + // Shift Dividend and check for overflow. + bool IsOverflow = Dividend >> 63; + Dividend <<= 1; + --Shift; + + // Get the next bit of Quotient. + Quotient <<= 1; + if (IsOverflow || Divisor <= Dividend) { + Quotient |= 1; + Dividend -= Divisor; + } + } + + return getRounded(Quotient, Shift, Dividend >= getHalf(Divisor)); +} diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp index e6d24d4512f2..6f7cc2a14b46 100644 --- a/unittests/Support/ScaledNumberTest.cpp +++ b/unittests/Support/ScaledNumberTest.cpp @@ -79,4 +79,116 @@ TEST(FloatsTest, getAdjusted) { EXPECT_EQ(getAdjusted64(UINT64_MAX), SP64(UINT64_MAX, 0)); } +TEST(PositiveFloatTest, getProduct) { + // Zero. + EXPECT_EQ(SP32(0, 0), getProduct32(0, 0)); + EXPECT_EQ(SP32(0, 0), getProduct32(0, 1)); + EXPECT_EQ(SP32(0, 0), getProduct32(0, 33)); + + // Basic. + EXPECT_EQ(SP32(6, 0), getProduct32(2, 3)); + EXPECT_EQ(SP32(UINT16_MAX / 3 * UINT16_MAX / 5 * 2, 0), + getProduct32(UINT16_MAX / 3, UINT16_MAX / 5 * 2)); + + // Overflow, no loss of precision. + // ==> 0xf00010 * 0x1001 + // ==> 0xf00f00000 + 0x10010 + // ==> 0xf00f10010 + // ==> 0xf00f1001 * 2^4 + EXPECT_EQ(SP32(0xf00f1001, 4), getProduct32(0xf00010, 0x1001)); + + // Overflow, loss of precision, rounds down. + // ==> 0xf000070 * 0x1001 + // ==> 0xf00f000000 + 0x70070 + // ==> 0xf00f070070 + // ==> 0xf00f0700 * 2^8 + EXPECT_EQ(SP32(0xf00f0700, 8), getProduct32(0xf000070, 0x1001)); + + // Overflow, loss of precision, rounds up. + // ==> 0xf000080 * 0x1001 + // ==> 0xf00f000000 + 0x80080 + // ==> 0xf00f080080 + // ==> 0xf00f0801 * 2^8 + EXPECT_EQ(SP32(0xf00f0801, 8), getProduct32(0xf000080, 0x1001)); + + // Reverse operand order. + EXPECT_EQ(SP32(0, 0), getProduct32(1, 0)); + EXPECT_EQ(SP32(0, 0), getProduct32(33, 0)); + EXPECT_EQ(SP32(6, 0), getProduct32(3, 2)); + EXPECT_EQ(SP32(UINT16_MAX / 3 * UINT16_MAX / 5 * 2, 0), + getProduct32(UINT16_MAX / 5 * 2, UINT16_MAX / 3)); + EXPECT_EQ(SP32(0xf00f1001, 4), getProduct32(0x1001, 0xf00010)); + EXPECT_EQ(SP32(0xf00f0700, 8), getProduct32(0x1001, 0xf000070)); + EXPECT_EQ(SP32(0xf00f0801, 8), getProduct32(0x1001, 0xf000080)); + + // Round to overflow. + EXPECT_EQ(SP64(UINT64_C(1) << 63, 64), + getProduct64(UINT64_C(10376293541461622786), + UINT64_C(16397105843297379211))); + + // Big number with rounding. + EXPECT_EQ(SP64(UINT64_C(9223372036854775810), 64), + getProduct64(UINT64_C(18446744073709551556), + UINT64_C(9223372036854775840))); +} + +TEST(PositiveFloatTest, Divide) { + // Zero. + EXPECT_EQ(SP32(0, 0), getQuotient32(0, 0)); + EXPECT_EQ(SP32(0, 0), getQuotient32(0, 1)); + EXPECT_EQ(SP32(0, 0), getQuotient32(0, 73)); + EXPECT_EQ(SP32(UINT32_MAX, INT16_MAX), getQuotient32(1, 0)); + EXPECT_EQ(SP32(UINT32_MAX, INT16_MAX), getQuotient32(6, 0)); + + // Powers of two. + EXPECT_EQ(SP32(1u << 31, -31), getQuotient32(1, 1)); + EXPECT_EQ(SP32(1u << 31, -30), getQuotient32(2, 1)); + EXPECT_EQ(SP32(1u << 31, -33), getQuotient32(4, 16)); + EXPECT_EQ(SP32(7u << 29, -29), getQuotient32(7, 1)); + EXPECT_EQ(SP32(7u << 29, -30), getQuotient32(7, 2)); + EXPECT_EQ(SP32(7u << 29, -33), getQuotient32(7, 16)); + + // Divide evenly. + EXPECT_EQ(SP32(3u << 30, -30), getQuotient32(9, 3)); + EXPECT_EQ(SP32(9u << 28, -28), getQuotient32(63, 7)); + + // Divide unevenly. + EXPECT_EQ(SP32(0xaaaaaaab, -33), getQuotient32(1, 3)); + EXPECT_EQ(SP32(0xd5555555, -31), getQuotient32(5, 3)); + + // 64-bit division is hard to test, since divide64 doesn't canonicalized its + // output. However, this is the algorithm the implementation uses: + // + // - Shift divisor right. + // - If we have 1 (power of 2), return early -- not canonicalized. + // - Shift dividend left. + // - 64-bit integer divide. + // - If there's a remainder, continue with long division. + // + // TODO: require less knowledge about the implementation in the test. + + // Zero. + EXPECT_EQ(SP64(0, 0), getQuotient64(0, 0)); + EXPECT_EQ(SP64(0, 0), getQuotient64(0, 1)); + EXPECT_EQ(SP64(0, 0), getQuotient64(0, 73)); + EXPECT_EQ(SP64(UINT64_MAX, INT16_MAX), getQuotient64(1, 0)); + EXPECT_EQ(SP64(UINT64_MAX, INT16_MAX), getQuotient64(6, 0)); + + // Powers of two. + EXPECT_EQ(SP64(1, 0), getQuotient64(1, 1)); + EXPECT_EQ(SP64(2, 0), getQuotient64(2, 1)); + EXPECT_EQ(SP64(4, -4), getQuotient64(4, 16)); + EXPECT_EQ(SP64(7, 0), getQuotient64(7, 1)); + EXPECT_EQ(SP64(7, -1), getQuotient64(7, 2)); + EXPECT_EQ(SP64(7, -4), getQuotient64(7, 16)); + + // Divide evenly. + EXPECT_EQ(SP64(UINT64_C(3) << 60, -60), getQuotient64(9, 3)); + EXPECT_EQ(SP64(UINT64_C(9) << 58, -58), getQuotient64(63, 7)); + + // Divide unevenly. + EXPECT_EQ(SP64(0xaaaaaaaaaaaaaaab, -65), getQuotient64(1, 3)); + EXPECT_EQ(SP64(0xd555555555555555, -63), getQuotient64(5, 3)); +} + } // end namespace From b019bb82bebe2c0d7669022ca552525db4ff70f6 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 22:30:31 +0000 Subject: [PATCH 0108/1155] Delete dead code. The compact unwind info is only used by code that knows it is supported. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211412 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 25 ++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index bf30a8e66633..78403424da63 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -717,11 +717,10 @@ class DarwinX86AsmBackend : public X86AsmBackend { }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { - bool SupportsCU; public: DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU, bool SupportsCU) - : DarwinX86AsmBackend(T, MRI, CPU, false), SupportsCU(SupportsCU) {} + StringRef CPU) + : DarwinX86AsmBackend(T, MRI, CPU, false) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, @@ -732,20 +731,16 @@ class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { /// \brief Generate the compact unwind encoding for the CFI instructions. uint32_t generateCompactUnwindEncoding( ArrayRef Instrs) const override { - return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; + return generateCompactUnwindEncodingImpl(Instrs); } }; class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { - bool SupportsCU; const MachO::CPUSubTypeX86 Subtype; public: DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU, bool SupportsCU, - MachO::CPUSubTypeX86 st) - : DarwinX86AsmBackend(T, MRI, CPU, true), SupportsCU(SupportsCU), - Subtype(st) { - } + StringRef CPU, MachO::CPUSubTypeX86 st) + : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, @@ -788,7 +783,7 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { /// \brief Generate the compact unwind encoding for the CFI instructions. uint32_t generateCompactUnwindEncoding( ArrayRef Instrs) const override { - return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; + return generateCompactUnwindEncodingImpl(Instrs); } }; @@ -801,9 +796,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, Triple TheTriple(TT); if (TheTriple.isOSBinFormatMachO()) - return new DarwinX86_32AsmBackend(T, MRI, CPU, - TheTriple.isMacOSX() && - !TheTriple.isMacOSXVersionLT(10, 7)); + return new DarwinX86_32AsmBackend(T, MRI, CPU); if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF()) return new WindowsX86AsmBackend(T, false, CPU); @@ -823,9 +816,7 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringSwitch(TheTriple.getArchName()) .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H) .Default(MachO::CPU_SUBTYPE_X86_64_ALL); - return new DarwinX86_64AsmBackend(T, MRI, CPU, - TheTriple.isMacOSX() && - !TheTriple.isMacOSXVersionLT(10, 7), CS); + return new DarwinX86_64AsmBackend(T, MRI, CPU, CS); } if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF()) From 6f5fa4a3638a0e8d6f905bc9681a083391f6ec07 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 22:33:40 +0000 Subject: [PATCH 0109/1155] Support: Write ScaledNumbers::getLg{,Floor,Ceiling}() git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211413 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 40 ++------ include/llvm/Support/ScaledNumber.h | 56 +++++++++++ unittests/Support/ScaledNumberTest.cpp | 94 +++++++++++++++++++ 3 files changed, 157 insertions(+), 33 deletions(-) diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index df4ebcad334f..7944af9533ad 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -67,16 +67,6 @@ class UnsignedFloatBase { return IsNeg ? -int64_t(U) : int64_t(U); } - static int32_t extractLg(const std::pair &Lg) { - return Lg.first; - } - static int32_t extractLgFloor(const std::pair &Lg) { - return Lg.first - (Lg.second > 0); - } - static int32_t extractLgCeiling(const std::pair &Lg) { - return Lg.first + (Lg.second < 0); - } - static int compare(uint64_t L, uint64_t R, int Shift) { assert(Shift >= 0); assert(Shift < 64); @@ -195,17 +185,21 @@ template class UnsignedFloat : UnsignedFloatBase { /// \brief The log base 2, rounded. /// /// Get the lg of the scalar. lg 0 is defined to be INT32_MIN. - int32_t lg() const { return extractLg(lgImpl()); } + int32_t lg() const { return ScaledNumbers::getLg(Digits, Exponent); } /// \brief The log base 2, rounded towards INT32_MIN. /// /// Get the lg floor. lg 0 is defined to be INT32_MIN. - int32_t lgFloor() const { return extractLgFloor(lgImpl()); } + int32_t lgFloor() const { + return ScaledNumbers::getLgFloor(Digits, Exponent); + } /// \brief The log base 2, rounded towards INT32_MAX. /// /// Get the lg ceiling. lg 0 is defined to be INT32_MIN. - int32_t lgCeiling() const { return extractLgCeiling(lgImpl()); } + int32_t lgCeiling() const { + return ScaledNumbers::getLgCeiling(Digits, Exponent); + } bool operator==(const UnsignedFloat &X) const { return compare(X) == 0; } bool operator<(const UnsignedFloat &X) const { return compare(X) < 0; } @@ -319,7 +313,6 @@ template class UnsignedFloat : UnsignedFloatBase { return ScaledNumbers::getQuotient(Dividend, Divisor); } - std::pair lgImpl() const; static int countLeadingZerosWidth(DigitsType Digits) { if (Width == 64) return countLeadingZeros64(Digits); @@ -421,25 +414,6 @@ IntT UnsignedFloat::toInt() const { return N; } -template -std::pair UnsignedFloat::lgImpl() const { - if (isZero()) - return std::make_pair(INT32_MIN, 0); - - // Get the floor of the lg of Digits. - int32_t LocalFloor = Width - countLeadingZerosWidth(Digits) - 1; - - // Get the floor of the lg of this. - int32_t Floor = Exponent + LocalFloor; - if (Digits == UINT64_C(1) << LocalFloor) - return std::make_pair(Floor, 0); - - // Round based on the next digit. - assert(LocalFloor >= 1); - bool Round = Digits & UINT64_C(1) << (LocalFloor - 1); - return std::make_pair(Floor + Round, Round ? 1 : -1); -} - template UnsignedFloat UnsignedFloat::matchExponents(UnsignedFloat X) { if (isZero() || X.isZero() || Exponent == X.Exponent) diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index 6789b89f7e6b..6d563fba1ee0 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -171,6 +171,62 @@ inline std::pair getQuotient64(uint64_t Dividend, return getQuotient(Dividend, Divisor); } +/// \brief Implementation of getLg() and friends. +/// +/// Returns the rounded lg of \c Digits*2^Scale and an int specifying whether +/// this was rounded up (1), down (-1), or exact (0). +/// +/// Returns \c INT32_MIN when \c Digits is zero. +template +inline std::pair getLgImpl(DigitsT Digits, int16_t Scale) { + static_assert(!std::numeric_limits::is_signed, "expected unsigned"); + + if (!Digits) + return std::make_pair(INT32_MIN, 0); + + // Get the floor of the lg of Digits. + int32_t LocalFloor = sizeof(Digits) * 8 - countLeadingZeros(Digits) - 1; + + // Get the actual floor. + int32_t Floor = Scale + LocalFloor; + if (Digits == UINT64_C(1) << LocalFloor) + return std::make_pair(Floor, 0); + + // Round based on the next digit. + assert(LocalFloor >= 1); + bool Round = Digits & UINT64_C(1) << (LocalFloor - 1); + return std::make_pair(Floor + Round, Round ? 1 : -1); +} + +/// \brief Get the lg (rounded) of a scaled number. +/// +/// Get the lg of \c Digits*2^Scale. +/// +/// Returns \c INT32_MIN when \c Digits is zero. +template int32_t getLg(DigitsT Digits, int16_t Scale) { + return getLgImpl(Digits, Scale).first; +} + +/// \brief Get the lg floor of a scaled number. +/// +/// Get the floor of the lg of \c Digits*2^Scale. +/// +/// Returns \c INT32_MIN when \c Digits is zero. +template int32_t getLgFloor(DigitsT Digits, int16_t Scale) { + auto Lg = getLgImpl(Digits, Scale); + return Lg.first - (Lg.second > 0); +} + +/// \brief Get the lg ceiling of a scaled number. +/// +/// Get the ceiling of the lg of \c Digits*2^Scale. +/// +/// Returns \c INT32_MIN when \c Digits is zero. +template int32_t getLgCeiling(DigitsT Digits, int16_t Scale) { + auto Lg = getLgImpl(Digits, Scale); + return Lg.first + (Lg.second < 0); +} + } // end namespace ScaledNumbers } // end namespace llvm diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp index 6f7cc2a14b46..db8101cc26b1 100644 --- a/unittests/Support/ScaledNumberTest.cpp +++ b/unittests/Support/ScaledNumberTest.cpp @@ -191,4 +191,98 @@ TEST(PositiveFloatTest, Divide) { EXPECT_EQ(SP64(0xd555555555555555, -63), getQuotient64(5, 3)); } +TEST(ScaledNumbersHelpersTest, getLg) { + EXPECT_EQ(0, getLg(UINT32_C(1), 0)); + EXPECT_EQ(1, getLg(UINT32_C(1), 1)); + EXPECT_EQ(1, getLg(UINT32_C(2), 0)); + EXPECT_EQ(3, getLg(UINT32_C(1), 3)); + EXPECT_EQ(3, getLg(UINT32_C(7), 0)); + EXPECT_EQ(3, getLg(UINT32_C(8), 0)); + EXPECT_EQ(3, getLg(UINT32_C(9), 0)); + EXPECT_EQ(3, getLg(UINT32_C(64), -3)); + EXPECT_EQ(31, getLg((UINT32_MAX >> 1) + 2, 0)); + EXPECT_EQ(32, getLg(UINT32_MAX, 0)); + EXPECT_EQ(-1, getLg(UINT32_C(1), -1)); + EXPECT_EQ(-1, getLg(UINT32_C(2), -2)); + EXPECT_EQ(INT32_MIN, getLg(UINT32_C(0), -1)); + EXPECT_EQ(INT32_MIN, getLg(UINT32_C(0), 0)); + EXPECT_EQ(INT32_MIN, getLg(UINT32_C(0), 1)); + + EXPECT_EQ(0, getLg(UINT64_C(1), 0)); + EXPECT_EQ(1, getLg(UINT64_C(1), 1)); + EXPECT_EQ(1, getLg(UINT64_C(2), 0)); + EXPECT_EQ(3, getLg(UINT64_C(1), 3)); + EXPECT_EQ(3, getLg(UINT64_C(7), 0)); + EXPECT_EQ(3, getLg(UINT64_C(8), 0)); + EXPECT_EQ(3, getLg(UINT64_C(9), 0)); + EXPECT_EQ(3, getLg(UINT64_C(64), -3)); + EXPECT_EQ(63, getLg((UINT64_MAX >> 1) + 2, 0)); + EXPECT_EQ(64, getLg(UINT64_MAX, 0)); + EXPECT_EQ(-1, getLg(UINT64_C(1), -1)); + EXPECT_EQ(-1, getLg(UINT64_C(2), -2)); + EXPECT_EQ(INT32_MIN, getLg(UINT64_C(0), -1)); + EXPECT_EQ(INT32_MIN, getLg(UINT64_C(0), 0)); + EXPECT_EQ(INT32_MIN, getLg(UINT64_C(0), 1)); +} + +TEST(ScaledNumbersHelpersTest, getLgFloor) { + EXPECT_EQ(0, getLgFloor(UINT32_C(1), 0)); + EXPECT_EQ(1, getLgFloor(UINT32_C(1), 1)); + EXPECT_EQ(1, getLgFloor(UINT32_C(2), 0)); + EXPECT_EQ(2, getLgFloor(UINT32_C(7), 0)); + EXPECT_EQ(3, getLgFloor(UINT32_C(1), 3)); + EXPECT_EQ(3, getLgFloor(UINT32_C(8), 0)); + EXPECT_EQ(3, getLgFloor(UINT32_C(9), 0)); + EXPECT_EQ(3, getLgFloor(UINT32_C(64), -3)); + EXPECT_EQ(31, getLgFloor((UINT32_MAX >> 1) + 2, 0)); + EXPECT_EQ(31, getLgFloor(UINT32_MAX, 0)); + EXPECT_EQ(INT32_MIN, getLgFloor(UINT32_C(0), -1)); + EXPECT_EQ(INT32_MIN, getLgFloor(UINT32_C(0), 0)); + EXPECT_EQ(INT32_MIN, getLgFloor(UINT32_C(0), 1)); + + EXPECT_EQ(0, getLgFloor(UINT64_C(1), 0)); + EXPECT_EQ(1, getLgFloor(UINT64_C(1), 1)); + EXPECT_EQ(1, getLgFloor(UINT64_C(2), 0)); + EXPECT_EQ(2, getLgFloor(UINT64_C(7), 0)); + EXPECT_EQ(3, getLgFloor(UINT64_C(1), 3)); + EXPECT_EQ(3, getLgFloor(UINT64_C(8), 0)); + EXPECT_EQ(3, getLgFloor(UINT64_C(9), 0)); + EXPECT_EQ(3, getLgFloor(UINT64_C(64), -3)); + EXPECT_EQ(63, getLgFloor((UINT64_MAX >> 1) + 2, 0)); + EXPECT_EQ(63, getLgFloor(UINT64_MAX, 0)); + EXPECT_EQ(INT32_MIN, getLgFloor(UINT64_C(0), -1)); + EXPECT_EQ(INT32_MIN, getLgFloor(UINT64_C(0), 0)); + EXPECT_EQ(INT32_MIN, getLgFloor(UINT64_C(0), 1)); +} + +TEST(ScaledNumbersHelpersTest, getLgCeiling) { + EXPECT_EQ(0, getLgCeiling(UINT32_C(1), 0)); + EXPECT_EQ(1, getLgCeiling(UINT32_C(1), 1)); + EXPECT_EQ(1, getLgCeiling(UINT32_C(2), 0)); + EXPECT_EQ(3, getLgCeiling(UINT32_C(1), 3)); + EXPECT_EQ(3, getLgCeiling(UINT32_C(7), 0)); + EXPECT_EQ(3, getLgCeiling(UINT32_C(8), 0)); + EXPECT_EQ(3, getLgCeiling(UINT32_C(64), -3)); + EXPECT_EQ(4, getLgCeiling(UINT32_C(9), 0)); + EXPECT_EQ(32, getLgCeiling(UINT32_MAX, 0)); + EXPECT_EQ(32, getLgCeiling((UINT32_MAX >> 1) + 2, 0)); + EXPECT_EQ(INT32_MIN, getLgCeiling(UINT32_C(0), -1)); + EXPECT_EQ(INT32_MIN, getLgCeiling(UINT32_C(0), 0)); + EXPECT_EQ(INT32_MIN, getLgCeiling(UINT32_C(0), 1)); + + EXPECT_EQ(0, getLgCeiling(UINT64_C(1), 0)); + EXPECT_EQ(1, getLgCeiling(UINT64_C(1), 1)); + EXPECT_EQ(1, getLgCeiling(UINT64_C(2), 0)); + EXPECT_EQ(3, getLgCeiling(UINT64_C(1), 3)); + EXPECT_EQ(3, getLgCeiling(UINT64_C(7), 0)); + EXPECT_EQ(3, getLgCeiling(UINT64_C(8), 0)); + EXPECT_EQ(3, getLgCeiling(UINT64_C(64), -3)); + EXPECT_EQ(4, getLgCeiling(UINT64_C(9), 0)); + EXPECT_EQ(64, getLgCeiling((UINT64_MAX >> 1) + 2, 0)); + EXPECT_EQ(64, getLgCeiling(UINT64_MAX, 0)); + EXPECT_EQ(INT32_MIN, getLgCeiling(UINT64_C(0), -1)); + EXPECT_EQ(INT32_MIN, getLgCeiling(UINT64_C(0), 0)); + EXPECT_EQ(INT32_MIN, getLgCeiling(UINT64_C(0), 1)); +} + } // end namespace From fb7b9a752280109b381ba220d1d5aa8bbad83079 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 20 Jun 2014 22:36:09 +0000 Subject: [PATCH 0110/1155] Support: ScaledNumber: Fix inconsistent test names git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211414 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/ScaledNumber.h | 1 - unittests/Support/ScaledNumberTest.cpp | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index 6d563fba1ee0..1acc4b1b163f 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -231,4 +231,3 @@ template int32_t getLgCeiling(DigitsT Digits, int16_t Scale) { } // end namespace llvm #endif - diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp index db8101cc26b1..cd3d6fa9c8cd 100644 --- a/unittests/Support/ScaledNumberTest.cpp +++ b/unittests/Support/ScaledNumberTest.cpp @@ -56,7 +56,7 @@ TEST(ScaledNumberHelpersTest, getRounded) { EXPECT_EQ(getRounded64(UINT64_MAX, 0, true), SP64(UINT64_C(1) << 63, 1)); } -TEST(FloatsTest, getAdjusted) { +TEST(ScaledNumberHelpersTest, getAdjusted) { const uint64_t Max32In64 = UINT32_MAX; EXPECT_EQ(getAdjusted32(0), SP32(0, 0)); EXPECT_EQ(getAdjusted32(0, 5), SP32(0, 5)); @@ -79,7 +79,7 @@ TEST(FloatsTest, getAdjusted) { EXPECT_EQ(getAdjusted64(UINT64_MAX), SP64(UINT64_MAX, 0)); } -TEST(PositiveFloatTest, getProduct) { +TEST(ScaledNumberHelpersTest, getProduct) { // Zero. EXPECT_EQ(SP32(0, 0), getProduct32(0, 0)); EXPECT_EQ(SP32(0, 0), getProduct32(0, 1)); @@ -132,7 +132,7 @@ TEST(PositiveFloatTest, getProduct) { UINT64_C(9223372036854775840))); } -TEST(PositiveFloatTest, Divide) { +TEST(ScaledNumberHelpersTest, getQuotient) { // Zero. EXPECT_EQ(SP32(0, 0), getQuotient32(0, 0)); EXPECT_EQ(SP32(0, 0), getQuotient32(0, 1)); @@ -191,7 +191,7 @@ TEST(PositiveFloatTest, Divide) { EXPECT_EQ(SP64(0xd555555555555555, -63), getQuotient64(5, 3)); } -TEST(ScaledNumbersHelpersTest, getLg) { +TEST(ScaledNumberHelpersTest, getLg) { EXPECT_EQ(0, getLg(UINT32_C(1), 0)); EXPECT_EQ(1, getLg(UINT32_C(1), 1)); EXPECT_EQ(1, getLg(UINT32_C(2), 0)); @@ -225,7 +225,7 @@ TEST(ScaledNumbersHelpersTest, getLg) { EXPECT_EQ(INT32_MIN, getLg(UINT64_C(0), 1)); } -TEST(ScaledNumbersHelpersTest, getLgFloor) { +TEST(ScaledNumberHelpersTest, getLgFloor) { EXPECT_EQ(0, getLgFloor(UINT32_C(1), 0)); EXPECT_EQ(1, getLgFloor(UINT32_C(1), 1)); EXPECT_EQ(1, getLgFloor(UINT32_C(2), 0)); @@ -255,7 +255,7 @@ TEST(ScaledNumbersHelpersTest, getLgFloor) { EXPECT_EQ(INT32_MIN, getLgFloor(UINT64_C(0), 1)); } -TEST(ScaledNumbersHelpersTest, getLgCeiling) { +TEST(ScaledNumberHelpersTest, getLgCeiling) { EXPECT_EQ(0, getLgCeiling(UINT32_C(1), 0)); EXPECT_EQ(1, getLgCeiling(UINT32_C(1), 1)); EXPECT_EQ(1, getLgCeiling(UINT32_C(2), 0)); From a896f2bddbd51717b15dd13bb1b0ec78081fb04c Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 22:37:01 +0000 Subject: [PATCH 0111/1155] Use a helper function and clang-format. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211415 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCObjectFileInfo.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 4df9221d6c4a..ebc67acf4cc3 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -18,6 +18,22 @@ #include "llvm/MC/MCSectionMachO.h" using namespace llvm; +static bool useCompactUnwind(const Triple &T) { + // Only on darwin. + if (!T.isOSDarwin()) + return false; + + // aarch64 always has it. + if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64) + return true; + + // Use it on newer version of OS X. + if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + return true; + + return false; +} + void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { // MachO IsFunctionEHFrameSymbolPrivate = false; @@ -151,13 +167,10 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { COFFDebugSymbolsSection = nullptr; - if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) || - (T.isOSDarwin() && - (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) { + if (useCompactUnwind(T)) { CompactUnwindSection = - Ctx->getMachOSection("__LD", "__compact_unwind", - MachO::S_ATTR_DEBUG, - SectionKind::getReadOnly()); + Ctx->getMachOSection("__LD", "__compact_unwind", MachO::S_ATTR_DEBUG, + SectionKind::getReadOnly()); if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; From 32b14f80c27f86e6d146cf711db5bedda69e831c Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 22:40:55 +0000 Subject: [PATCH 0112/1155] Use compact unwind for the iOS simulator. Another step in fixing pr19185. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211416 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCObjectFileInfo.cpp | 5 +++++ test/MC/X86/no-elf-compact-unwind.s | 1 + 2 files changed, 6 insertions(+) diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index ebc67acf4cc3..971b1a6e40b5 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -31,6 +31,11 @@ static bool useCompactUnwind(const Triple &T) { if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) return true; + // And the iOS simulator. + if (T.isiOS() && + (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)) + return true; + return false; } diff --git a/test/MC/X86/no-elf-compact-unwind.s b/test/MC/X86/no-elf-compact-unwind.s index 017c52ae9f3a..4e9236b87971 100644 --- a/test/MC/X86/no-elf-compact-unwind.s +++ b/test/MC/X86/no-elf-compact-unwind.s @@ -1,4 +1,5 @@ // RUN: llvm-mc < %s -filetype=obj -triple x86_64-apple-macosx10.8.0 | llvm-readobj -s | FileCheck -check-prefix=MACHO %s +// RUN: llvm-mc < %s -filetype=obj -triple x86_64-apple-ios7.0.0 | llvm-readobj -s | FileCheck -check-prefix=MACHO %s // RUN: llvm-mc < %s -filetype=obj -triple x86_64-unknown-linux | llvm-readobj -s | FileCheck -check-prefix=ELF %s .globl __Z3barv From 45fdc3d534113b4b5e6050f5843f7f74be56d2ae Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 20 Jun 2014 23:54:32 +0000 Subject: [PATCH 0113/1155] Always use a temp symbol for CIE. Fixes pr19185. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211423 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 7 +------ test/MC/MachO/pr19185.s | 6 ++++++ 2 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 test/MC/MachO/pr19185.s diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 3bcff86b1c65..c38eb045b949 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1340,12 +1340,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); bool verboseAsm = streamer.isVerboseAsm(); - MCSymbol *sectionStart; - if (MOFI->isFunctionEHFrameSymbolPrivate() || !IsEH) - sectionStart = context.CreateTempSymbol(); - else - sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum)); - + MCSymbol *sectionStart = context.CreateTempSymbol(); streamer.EmitLabel(sectionStart); CIENum++; diff --git a/test/MC/MachO/pr19185.s b/test/MC/MachO/pr19185.s new file mode 100644 index 000000000000..fb21e51b70eb --- /dev/null +++ b/test/MC/MachO/pr19185.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o %t.o +f: + .cfi_startproc + .cfi_endproc + +EH_frame0: From 9222a2375f1f2cb9528c6d9b32238f6bac434d94 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Sat, 21 Jun 2014 00:24:51 +0000 Subject: [PATCH 0114/1155] Fix the MinGW builder. Apparently std::call_once and std::recursive_mutex are not available on MinGW and breaks the builder. Revert to using a function local static and sys::Mutex just to get the tree green until we figure out a better solution. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211424 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/ManagedStatic.cpp | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index da01195a9df6..b8fb2841e525 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -14,33 +14,26 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Config/config.h" #include "llvm/Support/Atomic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" #include -#include using namespace llvm; static const ManagedStaticBase *StaticList = nullptr; -// ManagedStatics can get created during execution of static constructors. As a -// result, we cannot use a global static std::mutex object for the lock since it -// may not have been constructed. Instead, we do a call-once initialization of -// a pointer to a mutex. -static std::once_flag MutexInitializationFlag; -static std::recursive_mutex* ManagedStaticMutex = nullptr; - -// Not all supported platforms (in particular VS2012) have thread-safe function -// static initialization, so roll our own. -static std::recursive_mutex& GetManagedStaticMutex() { - std::call_once(MutexInitializationFlag, - []() { ManagedStaticMutex = new std::recursive_mutex(); } ); - - return *ManagedStaticMutex; +static sys::Mutex& getManagedStaticMutex() { + // We need to use a function local static here, since this can get called + // during a static constructor and we need to guarantee that it's initialized + // correctly. + static sys::Mutex ManagedStaticMutex; + return ManagedStaticMutex; } void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), void (*Deleter)(void*)) const { assert(Creator); if (llvm_is_multithreaded()) { - std::lock_guard Lock(GetManagedStaticMutex()); + MutexGuard Lock(getManagedStaticMutex()); if (!Ptr) { void* tmp = Creator(); @@ -90,7 +83,7 @@ void ManagedStaticBase::destroy() const { /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. void llvm::llvm_shutdown() { - std::lock_guard Lock(GetManagedStaticMutex()); + MutexGuard Lock(getManagedStaticMutex()); while (StaticList) StaticList->destroy(); From 5d0ff9c9289fc4f9e99c7b27b7c8b42597cd8765 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Sat, 21 Jun 2014 01:31:15 +0000 Subject: [PATCH 0115/1155] [X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions. This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions from a sequence of "vadd + vsub + blend". Example: /// typedef float float4 __attribute__((ext_vector_type(4))); float4 foo(float4 A, float4 B) { float4 X = A - B; float4 Y = A + B; return (float4){X[0], Y[1], X[2], Y[3]}; } /// Before this patch, (with flag -mcpu=corei7) llc produced the following assembly sequence: movaps %xmm0, %xmm2 addps %xmm1, %xmm2 subps %xmm1, %xmm0 blendps $10, %xmm2, %xmm0 With this patch, we now get a single addsubps %xmm1, %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211427 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 46 +++++++++ test/CodeGen/X86/sse3-avx-addsub.ll | 143 ++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 test/CodeGen/X86/sse3-avx-addsub.ll diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 11c3f11f2cdc..a33d2cc83211 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5355,6 +5355,52 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { f128mem, SSE_ALU_F64P>, PD; } +// Patterns used to select 'addsub' instructions. +let Predicates = [HasAVX] in { + // Constant 170 corresponds to the binary mask '10101010'. + // When used as a blend mask, it allows selecting eight elements from two + // input vectors as follow: + // - Even-numbered values in the destination are copied from + // the corresponding elements in the first input vector; + // - Odd-numbered values in the destination are copied from + // the corresponding elements in the second input vector. + + def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)), + (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))), + (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; + + // Constant 10 corresponds to the binary mask '1010'. + // In the two pattens below, constant 10 is used as a blend mask to select + // - the 1st and 3rd element from the first input vector (the 'fsub' node); + // - the 2nd and 4th element from the second input vector (the 'fadd' node). + + def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)), + (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))), + (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; + def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + + def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), + (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), + (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; +} + +let Predicates = [UseSSE3] in { + // Constant 10 corresponds to the binary mask '1010'. + // In the pattern below, it is used as a blend mask to select: + // - the 1st and 3rd element from the first input vector (the fsub node); + // - the 2nd and 4th element from the second input vector (the fadd node). + + def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + + def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), + (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), + (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; +} + //===---------------------------------------------------------------------===// // SSE3 Instructions //===---------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll new file mode 100644 index 000000000000..d7874e4145ee --- /dev/null +++ b/test/CodeGen/X86/sse3-avx-addsub.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK +; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK + +; Test ADDSUB ISel patterns. + +; All the functions below are obtained from the following source: +; +; typedef double double2 __attribute__((ext_vector_type(2))); +; typedef double double4 __attribute__((ext_vector_type(4))); +; typedef float float4 __attribute__((ext_vector_type(4))); +; typedef float float8 __attribute__((ext_vector_type(8))); +; +; float4 test1(float4 A, float4 B) { +; float4 X = A - B; +; float4 Y = A + B; +; return (float4){X[0], Y[1], X[2], Y[3]}; +; } +; +; float8 test2(float8 A, float8 B) { +; float8 X = A - B; +; float8 Y = A + B; +; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], [7]}; +; } +; +; double4 test3(double4 A, double4 B) { +; double4 X = A - B; +; double4 Y = A + B; +; return (double4){X[0], Y[1], X[2], Y[3]}; +; } +; +; double2 test4(double2 A, double2 B) { +; double2 X = A - B; +; double2 Y = A + B; +; return (double2){X[0], Y[1]}; +; } + +define <4 x float> @test1(<4 x float> %A, <4 x float> %B) { + %sub = fsub <4 x float> %A, %B + %add = fadd <4 x float> %A, %B + %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> + ret <4 x float> %vecinit6 +} +; CHECK-LABEL: test1 +; SSE: addsubps +; AVX: vaddsubps +; CHECK-NEXT: ret + + +define <8 x float> @test2(<8 x float> %A, <8 x float> %B) { + %sub = fsub <8 x float> %A, %B + %add = fadd <8 x float> %A, %B + %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> + ret <8 x float> %vecinit14 +} +; CHECK-LABEL: test2 +; SSE: addsubps +; SSE-NEXT: addsubps +; AVX: vaddsubps +; AVX-NOT: vaddsubps +; CHECK: ret + + +define <4 x double> @test3(<4 x double> %A, <4 x double> %B) { + %sub = fsub <4 x double> %A, %B + %add = fadd <4 x double> %A, %B + %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> + ret <4 x double> %vecinit6 +} +; CHECK-LABEL: test3 +; SSE: addsubpd +; SSE: addsubpd +; AVX: vaddsubpd +; AVX-NOT: vaddsubpd +; CHECK: ret + + +define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 { + %add = fadd <2 x double> %A, %B + %sub = fsub <2 x double> %A, %B + %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> + ret <2 x double> %vecinit2 +} +; CHECK-LABEL: test4 +; SSE: addsubpd +; AVX: vaddsubpd +; CHECK-NEXT: ret + + +define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) { + %1 = load <4 x float>* %B + %add = fadd <4 x float> %A, %1 + %sub = fsub <4 x float> %A, %1 + %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> + ret <4 x float> %vecinit6 +} +; CHECK-LABEL: test1b +; SSE: addsubps +; AVX: vaddsubps +; CHECK-NEXT: ret + + +define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) { + %1 = load <8 x float>* %B + %add = fadd <8 x float> %A, %1 + %sub = fsub <8 x float> %A, %1 + %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> + ret <8 x float> %vecinit14 +} +; CHECK-LABEL: test2b +; SSE: addsubps +; SSE-NEXT: addsubps +; AVX: vaddsubps +; AVX-NOT: vaddsubps +; CHECK: ret + + +define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) { + %1 = load <4 x double>* %B + %add = fadd <4 x double> %A, %1 + %sub = fsub <4 x double> %A, %1 + %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> + ret <4 x double> %vecinit6 +} +; CHECK-LABEL: test3b +; SSE: addsubpd +; SSE: addsubpd +; AVX: vaddsubpd +; AVX-NOT: vaddsubpd +; CHECK: ret + + +define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) { + %1 = load <2 x double>* %B + %sub = fsub <2 x double> %A, %1 + %add = fadd <2 x double> %A, %1 + %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> + ret <2 x double> %vecinit2 +} +; CHECK-LABEL: test4b +; SSE: addsubpd +; AVX: vaddsubpd +; CHECK-NEXT: ret + From 7921239c411aeeaef55998cc64495aa1ea1a3ea3 Mon Sep 17 00:00:00 2001 From: Richard Trieu Date: Sat, 21 Jun 2014 02:43:02 +0000 Subject: [PATCH 0116/1155] Add back functionality removed in r210497. Instead of asserting, output a message stating that a null pointer was found. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/IPA/CallGraphSCCPass.cpp | 6 ++++-- lib/Analysis/IVUsers.cpp | 6 ++++-- lib/Analysis/LoopPass.cpp | 6 ++++-- lib/Analysis/RegionPass.cpp | 6 ++++-- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 6 ++++-- lib/IR/Core.cpp | 14 ++++++++++---- lib/Transforms/Instrumentation/DebugIR.cpp | 6 ++++-- 7 files changed, 34 insertions(+), 16 deletions(-) diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 0d9d0ef842c6..730eb7156d1c 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -603,8 +603,10 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override { Out << Banner; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - assert((*I)->getFunction() && "Expecting non-null Function"); - (*I)->getFunction()->print(Out); + if ((*I)->getFunction()) + (*I)->getFunction()->print(Out); + else + Out << "Printing Function"; } return false; } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 0b94238e6631..24655aa002c8 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -287,8 +287,10 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << ")"; } OS << " in "; - assert(UI->getUser() != nullptr && "Expected non-null User"); - UI->getUser()->print(OS); + if (UI->getUser()) + UI->getUser()->print(OS); + else + OS << "Printing User"; OS << '\n'; } } diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 2c6e6e3ffff3..7bd866e73e10 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -45,8 +45,10 @@ class PrintLoopPass : public LoopPass { for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); b != be; ++b) { - assert((*b) != nullptr && "Expecting non-null block"); - (*b)->print(Out); + if (*b) + (*b)->print(Out); + else + Out << "Printing block"; } return false; } diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index d11b3323cac6..71de14450b7e 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -196,8 +196,10 @@ class PrintRegionPass : public RegionPass { bool runOnRegion(Region *R, RGPassManager &RGM) override { Out << Banner; for (const auto &BB : R->blocks()) { - assert(BB != nullptr && "Expecting non-null Block"); - BB->print(Out); + if (BB) + BB->print(Out); + else + Out << "Printing Block"; } return false; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d30588e5d4f1..799ee92d3ad5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1866,8 +1866,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { SmallString<8> StrVal; CFP->getValueAPF().toString(StrVal); - assert(CFP->getType() != nullptr && "Expecting non-null Type"); - CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + if (CFP->getType()) + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + else + AP.OutStreamer.GetCommentOS() << "Printing Type"; AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 68b9baa2baa2..0cb781c2c6b2 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -281,8 +281,11 @@ char *LLVMPrintTypeToString(LLVMTypeRef Ty) { std::string buf; raw_string_ostream os(buf); - assert(unwrap(Ty) != nullptr && "Expecting non-null Type"); - unwrap(Ty)->print(os); + if (unwrap(Ty)) + unwrap(Ty)->print(os); + else + os << "Printing Type"; + os.flush(); return strdup(buf.c_str()); @@ -532,8 +535,11 @@ char* LLVMPrintValueToString(LLVMValueRef Val) { std::string buf; raw_string_ostream os(buf); - assert(unwrap(Val) != nullptr && "Expecting non-null Value"); - unwrap(Val)->print(os); + if (unwrap(Val)) + unwrap(Val)->print(os); + else + os << "Printing Value"; + os.flush(); return strdup(buf.c_str()); diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp index 1bfef574ddb9..f2f1738808be 100644 --- a/lib/Transforms/Instrumentation/DebugIR.cpp +++ b/lib/Transforms/Instrumentation/DebugIR.cpp @@ -352,10 +352,12 @@ class DIUpdater : public InstVisitor { } std::string getTypeName(Type *T) { - assert(T != nullptr && "Expecting non-null Type"); std::string TypeName; raw_string_ostream TypeStream(TypeName); - T->print(TypeStream); + if (T) + T->print(TypeStream); + else + TypeStream << "Printing Type"; TypeStream.flush(); return TypeName; } From b7f1fb47e67af99d65dd1afc55910c8f19b5ec8e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 21 Jun 2014 11:47:18 +0000 Subject: [PATCH 0117/1155] SCEVExpander: Fold constant PHIs harder. The logic below only understands proper IVs. PR20093. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211433 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolutionExpander.cpp | 3 +- .../2014-06-21-congruent-constant.ll | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index b5070434d14d..8c75b0db70f2 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" @@ -1706,7 +1707,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = Phi->hasConstantValue()) { + if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT)) { Phi->replaceAllUsesWith(V); DeadInsts.push_back(Phi); ++NumElim; diff --git a/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll b/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll new file mode 100644 index 000000000000..3df4c210b4ad --- /dev/null +++ b/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -loop-unswitch -instcombine -indvars < %s | FileCheck %s + +@c = external global i32**, align 8 + +define void @test1() { +entry: + br i1 undef, label %for.end12, label %for.cond.preheader + +for.cond.preheader: ; preds = %entry + %0 = load i32*** @c, align 8 + %1 = load i32** %0, align 8 + %2 = load i32* %1, align 4 + br label %for.body + +for.body: ; preds = %for.cond.backedge, %for.body9.us, %for.cond.preheader + %3 = phi i32* [ %1, %for.cond.preheader ], [ %3, %for.cond.backedge ], [ %6, %for.body9.us ] + %4 = phi i32 [ %2, %for.cond.preheader ], [ undef, %for.cond.backedge ], [ %7, %for.body9.us ] + %i.024 = phi i32 [ 0, %for.cond.preheader ], [ %inc, %for.cond.backedge ], [ 0, %for.body9.us ] + %tobool1 = icmp eq i32 %4, 0 + br i1 %tobool1, label %if.end, label %for.cond.backedge + +if.end: ; preds = %for.body + %5 = load i32* %3, align 4 + %tobool4 = icmp eq i32 %5, 0 + br i1 %tobool4, label %for.cond3, label %for.body9.preheader + +for.body9.preheader: ; preds = %if.end + %tobool8 = icmp eq i32 %i.024, 1 + br i1 %tobool8, label %for.body9.us, label %for.body9 + +for.body9.us: ; preds = %for.body9.preheader + %6 = load i32** undef, align 8 + %7 = load i32* %6, align 4 + br label %for.body + +for.cond3: ; preds = %for.cond3, %if.end + br label %for.cond3 + +for.body9: ; preds = %for.body9, %for.body9.preheader + br label %for.body9 + +for.cond.backedge: ; preds = %for.body + %inc = add nsw i32 %i.024, 1 + br i1 false, label %for.body, label %for.end12 + +for.end12: ; preds = %for.cond.backedge, %entry + ret void + +; CHECK-LABEL: @test1 +; CHECK-NOT: phi +} From 636a9bece43b3f10c27fb1260467fdc3cf989ea1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 21 Jun 2014 12:56:42 +0000 Subject: [PATCH 0118/1155] Legalizer: Add support for splitting insert_subvectors. We handle this by spilling the whole thing to the stack and doing the insertion as a store. PR19492. This happens in real code because the vectorizer creates v2i128 when AVX is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211435 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 38 +++++++++++++++++++ test/CodeGen/X86/vec_split.ll | 33 ++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 04c200c9528c..6ef2d7f0bc93 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -570,6 +570,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 368eba396603..f8b1f3e702c0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -545,6 +545,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -765,6 +766,43 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, TLI.getVectorIdxTy())); } +void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT SubVecVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new subvector into the specified index. + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = + DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll index f9e7c20ba4e2..bc2c6633f20d 100644 --- a/test/CodeGen/X86/vec_split.ll +++ b/test/CodeGen/X86/vec_split.ll @@ -40,3 +40,36 @@ define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) { %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b ret <32 x i16> %2 } + +; PR19492 +define i128 @split128(<2 x i128> %a, <2 x i128> %b) { +; SSE4-LABEL: split128: +; SSE4: addq +; SSE4: adcq +; SSE4: addq +; SSE4: adcq +; SSE4: addq +; SSE4: adcq +; SSE4: ret +; AVX1-LABEL: split128: +; AVX1: addq +; AVX1: adcq +; AVX1: addq +; AVX1: adcq +; AVX1: addq +; AVX1: adcq +; AVX1: ret +; AVX2-LABEL: split128: +; AVX2: addq +; AVX2: adcq +; AVX2: addq +; AVX2: adcq +; AVX2: addq +; AVX2: adcq +; AVX2: ret + %add = add nsw <2 x i128> %a, %b + %rdx.shuf = shufflevector <2 x i128> %add, <2 x i128> undef, <2 x i32> + %bin.rdx = add <2 x i128> %add, %rdx.shuf + %e = extractelement <2 x i128> %bin.rdx, i32 1 + ret i128 %e +} From 6b7ff6be9c1bcf8ce440c7f1c7646fbf059562e4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 21 Jun 2014 13:46:25 +0000 Subject: [PATCH 0119/1155] LoopUnrollRuntime: Check for overflow in the trip count calculation. Fixes PR19823. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211436 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 23 +++++++++++----------- test/Transforms/LoopUnroll/runtime-loop.ll | 6 ++++++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 5bef091a499b..a96c46ad63e0 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -280,17 +280,17 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, SCEVExpander Expander(*SE, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); - Type *CountTy = TripCount->getType(); - BinaryOperator *ModVal = - BinaryOperator::CreateURem(TripCount, - ConstantInt::get(CountTy, Count), - "xtraiter"); - ModVal->insertBefore(PreHeaderBR); - - // Check if for no extra iterations, then jump to unrolled loop - Value *BranchVal = new ICmpInst(PreHeaderBR, - ICmpInst::ICMP_NE, ModVal, - ConstantInt::get(CountTy, 0), "lcmp"); + + IRBuilder<> B(PreHeaderBR); + Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); + + // Check if for no extra iterations, then jump to unrolled loop. We have to + // check that the trip count computation didn't overflow when adding one to + // the backedge taken count. + Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod"); + Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow"); + Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or"); + // Branch to either the extra iterations or the unrolled loop // We will fix up the true branch label when adding loop body copies BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); @@ -344,6 +344,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, } // The comparison w/ the extra iteration value and branch + Type *CountTy = TripCount->getType(); Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, ConstantInt::get(CountTy, leftOverIters), "un.tmp"); diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll index d8bbea9f1073..a14087dcdce7 100644 --- a/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/test/Transforms/LoopUnroll/runtime-loop.ll @@ -2,6 +2,12 @@ ; Tests for unrolling loops with run-time trip counts +; CHECK: %xtraiter = and i32 %n +; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; CHECK: %lcmp.overflow = icmp eq i32 %n, 0 +; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod +; CHECK: br i1 %lcmp.or, label %unr.cmp + ; CHECK: unr.cmp{{.*}}: ; CHECK: for.body.unr{{.*}}: ; CHECK: for.body: From 23a7bfce977e70673dbd512926136a385e9d66e3 Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Sat, 21 Jun 2014 17:55:51 +0000 Subject: [PATCH 0120/1155] MergeFunctions Pass, introduced total ordering among top-level comparison methods. Patch changes return type of FunctionComparator::compare() and FunctionComparator::compare(const BasicBlock*, const BasicBlock*) methods from bool (equal or not) to {-1, 0, 1} (less, equal, great). This patch belongs to patch series that improves MergeFunctions performance time from O(N*N) to O(N*log(N)). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211437 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 173 ++++++++++++++------------ 1 file changed, 94 insertions(+), 79 deletions(-) diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index c66df600700b..49dcb39780a0 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -167,14 +167,14 @@ class FunctionComparator { public: FunctionComparator(const DataLayout *DL, const Function *F1, const Function *F2) - : F1(F1), F2(F2), DL(DL) {} + : FnL(F1), FnR(F2), DL(DL) {} /// Test whether the two functions have equivalent behaviour. - bool compare(); + int compare(); private: /// Test whether two basic blocks have equivalent behaviour. - bool compare(const BasicBlock *BB1, const BasicBlock *BB2); + int compare(const BasicBlock *BBL, const BasicBlock *BBR); /// Constants comparison. /// Its analog to lexicographical comparison between hypothetical numbers @@ -411,7 +411,7 @@ class FunctionComparator { int cmpAttrs(const AttributeSet L, const AttributeSet R) const; // The two functions undergoing comparison. - const Function *F1, *F2; + const Function *FnL, *FnR; const DataLayout *DL; @@ -922,13 +922,13 @@ int FunctionComparator::cmpGEP(const GEPOperator *GEPL, /// See comments in declaration for more details. int FunctionComparator::cmpValues(const Value *L, const Value *R) { // Catch self-reference case. - if (L == F1) { - if (R == F2) + if (L == FnL) { + if (R == FnR) return 0; return -1; } - if (R == F2) { - if (L == F1) + if (R == FnR) { + if (L == FnL) return 0; return 1; } @@ -962,90 +962,102 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) { return cmpNumbers(LeftSN.first->second, RightSN.first->second); } // Test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { - BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); - BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); +int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { + BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); + BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); do { - if (!enumerate(F1I, F2I)) - return false; + if (int Res = cmpValues(InstL, InstR)) + return Res; - if (const GetElementPtrInst *GEP1 = dyn_cast(F1I)) { - const GetElementPtrInst *GEP2 = dyn_cast(F2I); - if (!GEP2) - return false; + const GetElementPtrInst *GEPL = dyn_cast(InstL); + const GetElementPtrInst *GEPR = dyn_cast(InstR); - if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) - return false; + if (GEPL && !GEPR) + return 1; + if (GEPR && !GEPL) + return -1; - if (!isEquivalentGEP(GEP1, GEP2)) - return false; + if (GEPL && GEPR) { + if (int Res = + cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) + return Res; + if (int Res = cmpGEP(GEPL, GEPR)) + return Res; } else { - if (!isEquivalentOperation(F1I, F2I)) - return false; - - assert(F1I->getNumOperands() == F2I->getNumOperands()); - for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { - Value *OpF1 = F1I->getOperand(i); - Value *OpF2 = F2I->getOperand(i); - - if (!enumerate(OpF1, OpF2)) - return false; + if (int Res = cmpOperation(InstL, InstR)) + return Res; + assert(InstL->getNumOperands() == InstR->getNumOperands()); - if (OpF1->getValueID() != OpF2->getValueID() || - !isEquivalentType(OpF1->getType(), OpF2->getType())) - return false; + for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { + Value *OpL = InstL->getOperand(i); + Value *OpR = InstR->getOperand(i); + if (int Res = cmpValues(OpL, OpR)) + return Res; + if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID())) + return Res; + // TODO: Already checked in cmpOperation + if (int Res = cmpType(OpL->getType(), OpR->getType())) + return Res; } } - ++F1I, ++F2I; - } while (F1I != F1E && F2I != F2E); + ++InstL, ++InstR; + } while (InstL != InstLE && InstR != InstRE); - return F1I == F1E && F2I == F2E; + if (InstL != InstLE && InstR == InstRE) + return 1; + if (InstL == InstLE && InstR != InstRE) + return -1; + return 0; } // Test whether the two functions have equivalent behaviour. -bool FunctionComparator::compare() { - // We need to recheck everything, but check the things that weren't included - // in the hash first. +int FunctionComparator::compare() { sn_mapL.clear(); sn_mapR.clear(); - if (F1->getAttributes() != F2->getAttributes()) - return false; + if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) + return Res; - if (F1->hasGC() != F2->hasGC()) - return false; + if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) + return Res; - if (F1->hasGC() && F1->getGC() != F2->getGC()) - return false; + if (FnL->hasGC()) { + if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC())) + return Res; + } - if (F1->hasSection() != F2->hasSection()) - return false; + if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) + return Res; - if (F1->hasSection() && F1->getSection() != F2->getSection()) - return false; + if (FnL->hasSection()) { + if (int Res = cmpStrings(FnL->getSection(), FnR->getSection())) + return Res; + } - if (F1->isVarArg() != F2->isVarArg()) - return false; + if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) + return Res; // TODO: if it's internal and only used in direct calls, we could handle this // case too. - if (F1->getCallingConv() != F2->getCallingConv()) - return false; + if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) + return Res; - if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) - return false; + if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType())) + return Res; - assert(F1->arg_size() == F2->arg_size() && + assert(FnL->arg_size() == FnR->arg_size() && "Identically typed functions have different numbers of args!"); // Visit the arguments so that they get enumerated in the order they're // passed in. - for (Function::const_arg_iterator f1i = F1->arg_begin(), - f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { - if (!enumerate(f1i, f2i)) + for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), + ArgRI = FnR->arg_begin(), + ArgLE = FnL->arg_end(); + ArgLI != ArgLE; ++ArgLI, ++ArgRI) { + if (cmpValues(ArgLI, ArgRI) != 0) llvm_unreachable("Arguments repeat!"); } @@ -1053,33 +1065,36 @@ bool FunctionComparator::compare() { // linked list is immaterial. Our walk starts at the entry block for both // functions, then takes each block from each terminator in order. As an // artifact, this also means that unreachable blocks are ignored. - SmallVector F1BBs, F2BBs; + SmallVector FnLBBs, FnRBBs; SmallSet VisitedBBs; // in terms of F1. - F1BBs.push_back(&F1->getEntryBlock()); - F2BBs.push_back(&F2->getEntryBlock()); + FnLBBs.push_back(&FnL->getEntryBlock()); + FnRBBs.push_back(&FnR->getEntryBlock()); - VisitedBBs.insert(F1BBs[0]); - while (!F1BBs.empty()) { - const BasicBlock *F1BB = F1BBs.pop_back_val(); - const BasicBlock *F2BB = F2BBs.pop_back_val(); + VisitedBBs.insert(FnLBBs[0]); + while (!FnLBBs.empty()) { + const BasicBlock *BBL = FnLBBs.pop_back_val(); + const BasicBlock *BBR = FnRBBs.pop_back_val(); - if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) - return false; + if (int Res = cmpValues(BBL, BBR)) + return Res; - const TerminatorInst *F1TI = F1BB->getTerminator(); - const TerminatorInst *F2TI = F2BB->getTerminator(); + if (int Res = compare(BBL, BBR)) + return Res; + + const TerminatorInst *TermL = BBL->getTerminator(); + const TerminatorInst *TermR = BBR->getTerminator(); - assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); - for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(F1TI->getSuccessor(i))) + assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); + for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(TermL->getSuccessor(i))) continue; - F1BBs.push_back(F1TI->getSuccessor(i)); - F2BBs.push_back(F2TI->getSuccessor(i)); + FnLBBs.push_back(TermL->getSuccessor(i)); + FnRBBs.push_back(TermR->getSuccessor(i)); } } - return true; + return 0; } namespace { @@ -1226,8 +1241,8 @@ bool DenseMapInfo::isEqual(const ComparableFunction &LHS, assert(LHS.getDataLayout() == RHS.getDataLayout() && "Comparing functions for different targets"); - return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(), - RHS.getFunc()).compare(); + return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(), RHS.getFunc()) + .compare() == 0; } // Replace direct callers of Old with New. From e298b1ce98c4077b7bc4c317c1815e6b729c41a1 Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Sat, 21 Jun 2014 18:58:11 +0000 Subject: [PATCH 0121/1155] MergeFunctions Pass, introduced sanity check, that checks order relation, introduced among functions set. This patch belongs to patch series that improves MergeFunctions performance time from O(N*N) to O(N*log(N)). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 88 +++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 49dcb39780a0..0c8585f80628 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -60,6 +60,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -73,6 +74,13 @@ STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); +static cl::opt NumFunctionsForSanityCheck( + "mergefunc-sanity", + cl::desc("How many functions in module could be used for " + "MergeFunctions pass sanity check. " + "'0' disables this check. Works only with '-debug' key."), + cl::init(0), cl::Hidden); + /// Returns the type id for a type to be hashed. We turn pointer types into /// integers here because the actual compare logic below considers pointers and /// integers of the same size as equal. @@ -1121,6 +1129,10 @@ class MergeFunctions : public ModulePass { /// analyzed again. std::vector Deferred; + /// Checks the rules of order relation introduced among functions set. + /// Returns true, if sanity check has been passed, and false if failed. + bool doSanityCheck(std::vector &Worklist); + /// Insert a ComparableFunction into the FnSet, or merge it away if it's /// equal to one that's already present. bool insert(ComparableFunction &NewF); @@ -1172,6 +1184,80 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } +bool MergeFunctions::doSanityCheck(std::vector &Worklist) { + if (const unsigned Max = NumFunctionsForSanityCheck) { + unsigned TripleNumber = 0; + bool Valid = true; + + dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; + + unsigned i = 0; + for (std::vector::iterator I = Worklist.begin(), E = Worklist.end(); + I != E && i < Max; ++I, ++i) { + unsigned j = i; + for (std::vector::iterator J = I; J != E && j < Max; ++J, ++j) { + Function *F1 = cast(*I); + Function *F2 = cast(*J); + int Res1 = FunctionComparator(DL, F1, F2).compare(); + int Res2 = FunctionComparator(DL, F2, F1).compare(); + + // If F1 <= F2, then F2 >= F1, otherwise report failure. + if (Res1 != -Res2) { + dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber + << "\n"; + F1->dump(); + F2->dump(); + Valid = false; + } + + if (Res1 == 0) + continue; + + unsigned k = j; + for (std::vector::iterator K = J; K != E && k < Max; + ++k, ++K, ++TripleNumber) { + if (K == J) + continue; + + Function *F3 = cast(*K); + int Res3 = FunctionComparator(DL, F1, F3).compare(); + int Res4 = FunctionComparator(DL, F2, F3).compare(); + + bool Transitive = true; + + // F1 > F2, F2 > F3 => F1 > F3 + if (Res1 != 0 && Res1 == Res4) { + Transitive = Res3 == Res1; + } else + // F1 > F3, F3 > F2 => F1 > F2 + if (Res3 != 0 && Res3 == -Res4) { + Transitive = Res3 == Res1; + } else + // F2 > F3, F3 > F1 => F2 > F1 + if (Res4 != 0 && -Res3 == Res4) { + Transitive = Res4 == -Res1; + } + + if (!Transitive) { + dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: " + << TripleNumber << "\n"; + dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " + << Res4 << "\n"; + F1->dump(); + F2->dump(); + F3->dump(); + Valid = false; + } + } + } + } + + dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n"; + return Valid; + } + return true; +} + bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable(); @@ -1187,6 +1273,8 @@ bool MergeFunctions::runOnModule(Module &M) { std::vector Worklist; Deferred.swap(Worklist); + DEBUG(doSanityCheck(Worklist)); + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); From 768231880a1013e0577c94bfa7fdc8b87f901b20 Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Sat, 21 Jun 2014 19:07:51 +0000 Subject: [PATCH 0122/1155] MergeFunctions, doSanityCheck: fixed body comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211443 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 0c8585f80628..7130b5431167 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -1225,16 +1225,14 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { bool Transitive = true; - // F1 > F2, F2 > F3 => F1 > F3 if (Res1 != 0 && Res1 == Res4) { + // F1 > F2, F2 > F3 => F1 > F3 Transitive = Res3 == Res1; - } else - // F1 > F3, F3 > F2 => F1 > F2 - if (Res3 != 0 && Res3 == -Res4) { + } else if (Res3 != 0 && Res3 == -Res4) { + // F1 > F3, F3 > F2 => F1 > F2 Transitive = Res3 == Res1; - } else - // F2 > F3, F3 > F1 => F2 > F1 - if (Res4 != 0 && -Res3 == Res4) { + } else if (Res4 != 0 && -Res3 == Res4) { + // F2 > F3, F3 > F1 => F2 > F1 Transitive = Res4 == -Res1; } From 83ac8b35e91d69222095b6952f3302ba3f43426b Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Sat, 21 Jun 2014 20:13:24 +0000 Subject: [PATCH 0123/1155] MergeFunctions Pass, removed unused methods from old implementation. Patch removed next old FunctionComparator methods: * enumerate * isEquivalentOperation * isEquivalentGEP * isEquivalentType This patch belongs to patch series that improves MergeFunctions performance time from O(N*N) to O(N*log(N)). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211444 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 7130b5431167..8ec8ff29af26 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -308,10 +308,6 @@ class FunctionComparator { /// see comments for sn_mapL and sn_mapR. int cmpValues(const Value *L, const Value *R); - bool enumerate(const Value *V1, const Value *V2) { - return cmpValues(V1, V2) == 0; - } - /// Compare two Instructions for equivalence, similar to /// Instruction::isSameOperationAs but with modifications to the type /// comparison. @@ -338,11 +334,6 @@ class FunctionComparator { /// strings for particular instruction, and could change sometimes. int cmpOperation(const Instruction *L, const Instruction *R) const; - bool isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const { - return cmpOperation(I1, I2) == 0; - } - /// Compare two GEPs for equivalent pointer arithmetic. /// Parts to be compared for each comparison stage, /// most significant stage first: @@ -357,14 +348,6 @@ class FunctionComparator { return cmpGEP(cast(GEPL), cast(GEPR)); } - bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { - return cmpGEP(GEP1, GEP2) == 0; - } - bool isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2) { - return isEquivalentGEP(cast(GEP1), cast(GEP2)); - } - /// cmpType - compares two types, /// defines total ordering among the types set. /// @@ -407,10 +390,6 @@ class FunctionComparator { /// 6. For all other cases put llvm_unreachable. int cmpType(Type *TyL, Type *TyR) const; - bool isEquivalentType(Type *Ty1, Type *Ty2) const { - return cmpType(Ty1, Ty2) == 0; - } - int cmpNumbers(uint64_t L, uint64_t R) const; int cmpAPInt(const APInt &L, const APInt &R) const; From 427afb91d38252b1abf0afaa08099f1f8cda3b66 Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Sat, 21 Jun 2014 20:54:36 +0000 Subject: [PATCH 0124/1155] MergeFunctions Pass, FnSet has been replaced with FnTree. Patch activates new implementation. So from now, merging process should take time O(N*log(N)). Where N size of module (we are free to measure it in functions or in instructions). Internally FnTree represents binary tree. So every lookup operation takes O(log(N)) time. It is still not the last patch in series, we also have to clean-up pass from old code, and update pass comments. This patch belongs to patch series that improves MergeFunctions performance time from O(N*N) to O(N*log(N)). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211445 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 86 +++++++++++++++------------ 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 8ec8ff29af26..5cf6c5ae8326 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -438,6 +438,18 @@ class FunctionComparator { DenseMap sn_mapL, sn_mapR; }; +class FunctionPtr { + AssertingVH F; + const DataLayout *DL; + +public: + FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {} + Function *getFunc() const { return F; } + void release() { F = 0; } + bool operator<(const FunctionPtr &RHS) const { + return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1; + } +}; } int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { @@ -1102,7 +1114,7 @@ class MergeFunctions : public ModulePass { bool runOnModule(Module &M) override; private: - typedef DenseSet FnSetType; + typedef std::set FnTreeType; /// A work queue of functions that may have been modified and should be /// analyzed again. @@ -1112,15 +1124,15 @@ class MergeFunctions : public ModulePass { /// Returns true, if sanity check has been passed, and false if failed. bool doSanityCheck(std::vector &Worklist); - /// Insert a ComparableFunction into the FnSet, or merge it away if it's + /// Insert a ComparableFunction into the FnTree, or merge it away if it's /// equal to one that's already present. - bool insert(ComparableFunction &NewF); + bool insert(Function *NewFunction); - /// Remove a Function from the FnSet and queue it up for a second sweep of + /// Remove a Function from the FnTree and queue it up for a second sweep of /// analysis. void remove(Function *F); - /// Find the functions that use this Value and remove them from FnSet and + /// Find the functions that use this Value and remove them from FnTree and /// queue the functions. void removeUsers(Value *V); @@ -1145,7 +1157,7 @@ class MergeFunctions : public ModulePass { /// The set of all distinct functions. Use the insert() and remove() methods /// to modify it. - FnSetType FnSet; + FnTreeType FnTree; /// DataLayout for more accurate GEP comparisons. May be NULL. const DataLayout *DL; @@ -1244,7 +1256,6 @@ bool MergeFunctions::runOnModule(Module &M) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) Deferred.push_back(WeakVH(I)); } - FnSet.resize(Deferred.size()); do { std::vector Worklist; @@ -1263,8 +1274,7 @@ bool MergeFunctions::runOnModule(Module &M) { Function *F = cast(*I); if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && !F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, DL); - Changed |= insert(CF); + Changed |= insert(F); } } @@ -1278,14 +1288,13 @@ bool MergeFunctions::runOnModule(Module &M) { Function *F = cast(*I); if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, DL); - Changed |= insert(CF); + Changed |= insert(F); } } - DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); + DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n'); } while (!Deferred.empty()); - FnSet.clear(); + FnTree.clear(); return Changed; } @@ -1464,54 +1473,57 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { ++NumFunctionsMerged; } -// Insert a ComparableFunction into the FnSet, or merge it away if equal to one +// Insert a ComparableFunction into the FnTree, or merge it away if equal to one // that was already inserted. -bool MergeFunctions::insert(ComparableFunction &NewF) { - std::pair Result = FnSet.insert(NewF); +bool MergeFunctions::insert(Function *NewFunction) { + std::pair Result = + FnTree.insert(FunctionPtr(NewFunction, DL)); + if (Result.second) { - DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); + DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); return false; } - const ComparableFunction &OldF = *Result.first; + const FunctionPtr &OldF = *Result.first; // Don't merge tiny functions, since it can just end up making the function // larger. // FIXME: Should still merge them if they are unnamed_addr and produce an // alias. - if (NewF.getFunc()->size() == 1) { - if (NewF.getFunc()->front().size() <= 2) { - DEBUG(dbgs() << NewF.getFunc()->getName() - << " is to small to bother merging\n"); + if (NewFunction->size() == 1) { + if (NewFunction->front().size() <= 2) { + DEBUG(dbgs() << NewFunction->getName() + << " is to small to bother merging\n"); return false; } } // Never thunk a strong function to a weak function. - assert(!OldF.getFunc()->mayBeOverridden() || - NewF.getFunc()->mayBeOverridden()); + assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden()); - DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " - << NewF.getFunc()->getName() << '\n'); + DEBUG(dbgs() << " " << OldF.getFunc()->getName() + << " == " << NewFunction->getName() << '\n'); - Function *DeleteF = NewF.getFunc(); - NewF.release(); + Function *DeleteF = NewFunction; mergeTwoFunctions(OldF.getFunc(), DeleteF); return true; } -// Remove a function from FnSet. If it was already in FnSet, add it to Deferred -// so that we'll look at it in the next round. +// Remove a function from FnTree. If it was already in FnTree, add +// it to Deferred so that we'll look at it in the next round. void MergeFunctions::remove(Function *F) { // We need to make sure we remove F, not a function "equal" to F per the // function equality comparator. - // - // The special "lookup only" ComparableFunction bypasses the expensive - // function comparison in favour of a pointer comparison on the underlying - // Function*'s. - ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); - if (FnSet.erase(CF)) { - DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); + FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL)); + size_t Erased = 0; + if (found != FnTree.end() && found->getFunc() == F) { + Erased = 1; + FnTree.erase(found); + } + + if (Erased) { + DEBUG(dbgs() << "Removed " << F->getName() + << " from set and deferred it.\n"); Deferred.push_back(F); } } From 09f104fc323ff9b8cfcecd1f0ec7b470b38c69a1 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Sun, 22 Jun 2014 00:33:44 +0000 Subject: [PATCH 0125/1155] Report error for non-zero data in .bss User may initialize a var with non-zero value and specify .bss section. E.g. : int a __attribute__((section(".bss"))) = 2; This patch converts an assertion to error report for better user experience. Differential Revision: http://reviews.llvm.org/D4199 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211455 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAssembler.cpp | 10 ++++++++-- test/MC/ELF/ARM/bss-non-zero-value.s | 9 +++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 test/MC/ELF/ARM/bss-non-zero-value.s diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 886a5f554531..4f876c8ce7de 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCSectionELF.h" #include using namespace llvm; @@ -782,8 +783,13 @@ void MCAssembler::writeSectionData(const MCSectionData *SD, assert(DF.fixup_begin() == DF.fixup_end() && "Cannot have fixups in virtual section!"); for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i) - assert(DF.getContents()[i] == 0 && - "Invalid data value for virtual section!"); + if (DF.getContents()[i]) { + if (auto *ELFSec = dyn_cast(&SD->getSection())) + report_fatal_error("non-zero initializer found in section '" + + ELFSec->getSectionName() + "'"); + else + report_fatal_error("non-zero initializer found in virtual section"); + } break; } case MCFragment::FT_Align: diff --git a/test/MC/ELF/ARM/bss-non-zero-value.s b/test/MC/ELF/ARM/bss-non-zero-value.s new file mode 100644 index 000000000000..999b8b019c9c --- /dev/null +++ b/test/MC/ELF/ARM/bss-non-zero-value.s @@ -0,0 +1,9 @@ +// RUN: not llvm-mc -filetype=obj -triple arm-linux-gnu %s -o %t 2>%t.out +// RUN: FileCheck --input-file=%t.out %s +// CHECK: non-zero initializer found in section '.bss' + .bss + .globl a + .align 2 +a: + .long 1 + .size a, 4 From aa5b571d45b6798c561bd48f28b9606a9033592f Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Sun, 22 Jun 2014 00:57:09 +0000 Subject: [PATCH 0126/1155] MergeFunctions Pass, updated header comments. Added short description for new comparison algorithm, that introduces total ordering among functions set. This patch belongs to patch series that improves MergeFunctions performance time from O(N*N) to O(N*log(N)). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211456 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 56 ++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 5cf6c5ae8326..ee6b11b45a79 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -9,13 +9,24 @@ // // This pass looks for equivalent functions that are mergable and folds them. // -// A hash is computed from the function, based on its type and number of -// basic blocks. +// Order relation is defined on set of functions. It was made through +// special function comparison procedure that returns +// 0 when functions are equal, +// -1 when Left function is less than right function, and +// 1 for opposite case. We need total-ordering, so we need to maintain +// four properties on the functions set: +// a <= a (reflexivity) +// if a <= b and b <= a then a = b (antisymmetry) +// if a <= b and b <= c then a <= c (transitivity). +// for all a and b: a <= b or b <= a (totality). // -// Once all hashes are computed, we perform an expensive equality comparison -// on each function pair. This takes n^2/2 comparisons per bucket, so it's -// important that the hash function be high quality. The equality comparison -// iterates through each instruction in each basic block. +// Comparison iterates through each instruction in each basic block. +// Functions are kept on binary tree. For each new function F we perform +// lookup in binary tree. +// In practice it works the following way: +// -- We define Function* container class with custom "operator<" (FunctionPtr). +// -- "FunctionPtr" instances are stored in std::set collection, so every +// std::set::insert operation will give you result in log(N) time. // // When a match is found the functions are folded. If both functions are // overridable, we move the functionality into a new internal function and @@ -31,9 +42,6 @@ // the object they belong to. However, as long as it's only used for a lookup // and call, this is irrelevant, and we'd like to fold such functions. // -// * switch from n^2 pair-wise comparisons to an n-way comparison for each -// bucket. -// // * be smarter about bitcasts. // // In order to fold functions, we will sometimes add either bitcast instructions @@ -41,6 +49,36 @@ // analysis since the two functions differ where one has a bitcast and the // other doesn't. We should learn to look through bitcasts. // +// * Compare complex types with pointer types inside. +// * Compare cross-reference cases. +// * Compare complex expressions. +// +// All the three issues above could be described as ability to prove that +// fA == fB == fC == fE == fF == fG in example below: +// +// void fA() { +// fB(); +// } +// void fB() { +// fA(); +// } +// +// void fE() { +// fF(); +// } +// void fF() { +// fG(); +// } +// void fG() { +// fE(); +// } +// +// Simplest cross-reference case (fA <--> fB) was implemented in previous +// versions of MergeFunctions, though it presented only in two function pairs +// in test-suite (that counts >50k functions) +// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A) +// could cover much more cases. +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" From c815039ca2a5b18d3cbc5b35767287f7ec67e7a7 Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Sun, 22 Jun 2014 01:53:30 +0000 Subject: [PATCH 0127/1155] MergeFunctions Pass, removed DenseMap helpers. Patch removes rest part of code related to old implementation. This patch belongs to patch series that improves MergeFunctions performance time from O(N*N) to O(N*log(N)). This one was the final patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211457 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/MergeFunctions.cpp | 104 -------------------------- 1 file changed, 104 deletions(-) diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index ee6b11b45a79..559ef0ba2285 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -119,90 +119,6 @@ static cl::opt NumFunctionsForSanityCheck( "'0' disables this check. Works only with '-debug' key."), cl::init(0), cl::Hidden); -/// Returns the type id for a type to be hashed. We turn pointer types into -/// integers here because the actual compare logic below considers pointers and -/// integers of the same size as equal. -static Type::TypeID getTypeIDForHash(Type *Ty) { - if (Ty->isPointerTy()) - return Type::IntegerTyID; - return Ty->getTypeID(); -} - -/// Creates a hash-code for the function which is the same for any two -/// functions that will compare equal, without looking at the instructions -/// inside the function. -static unsigned profileFunction(const Function *F) { - FunctionType *FTy = F->getFunctionType(); - - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); - return ID.ComputeHash(); -} - -namespace { - -/// ComparableFunction - A struct that pairs together functions with a -/// DataLayout so that we can keep them together as elements in the DenseSet. -class ComparableFunction { -public: - static const ComparableFunction EmptyKey; - static const ComparableFunction TombstoneKey; - static DataLayout * const LookupOnly; - - ComparableFunction(Function *Func, const DataLayout *DL) - : Func(Func), Hash(profileFunction(Func)), DL(DL) {} - - Function *getFunc() const { return Func; } - unsigned getHash() const { return Hash; } - const DataLayout *getDataLayout() const { return DL; } - - // Drops AssertingVH reference to the function. Outside of debug mode, this - // does nothing. - void release() { - assert(Func && - "Attempted to release function twice, or release empty/tombstone!"); - Func = nullptr; - } - -private: - explicit ComparableFunction(unsigned Hash) - : Func(nullptr), Hash(Hash), DL(nullptr) {} - - AssertingVH Func; - unsigned Hash; - const DataLayout *DL; -}; - -const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); -const ComparableFunction ComparableFunction::TombstoneKey = - ComparableFunction(1); -DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1); - -} - -namespace llvm { - template <> - struct DenseMapInfo { - static ComparableFunction getEmptyKey() { - return ComparableFunction::EmptyKey; - } - static ComparableFunction getTombstoneKey() { - return ComparableFunction::TombstoneKey; - } - static unsigned getHashValue(const ComparableFunction &CF) { - return CF.getHash(); - } - static bool isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS); - }; -} - namespace { /// FunctionComparator - Compares two functions to determine whether or not @@ -1337,26 +1253,6 @@ bool MergeFunctions::runOnModule(Module &M) { return Changed; } -bool DenseMapInfo::isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS) { - if (LHS.getFunc() == RHS.getFunc() && - LHS.getHash() == RHS.getHash()) - return true; - if (!LHS.getFunc() || !RHS.getFunc()) - return false; - - // One of these is a special "underlying pointer comparison only" object. - if (LHS.getDataLayout() == ComparableFunction::LookupOnly || - RHS.getDataLayout() == ComparableFunction::LookupOnly) - return false; - - assert(LHS.getDataLayout() == RHS.getDataLayout() && - "Comparing functions for different targets"); - - return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(), RHS.getFunc()) - .compare() == 0; -} - // Replace direct callers of Old with New. void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); From 78f173a22d94ec3fbcbd9fe0adf1f2093fb9a95e Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Sun, 22 Jun 2014 03:38:59 +0000 Subject: [PATCH 0128/1155] LoopVectorizer: Fix a dominance issue The induction variables start value needs to be defined before we branch (overflow check) to the scalar preheader where we used it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211460 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 25 +++++++++++----- test/Transforms/LoopVectorize/induction.ll | 34 ++++++++++++++++++++++ 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 15d4c1c79d2b..79a6ecd15b72 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1987,10 +1987,6 @@ void InnerLoopVectorizer::createEmptyLoop() { Constant::getAllOnesValue(BackedgeCount->getType()), "backedge.overflow", BypassBlock->getTerminator()); - // Count holds the overall loop count (N). - Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - BypassBlock->getTerminator()); - // The loop index does not have to start at Zero. Find the original start // value from the induction PHI node. If we don't have an induction variable // then we know that it starts at zero. @@ -2000,6 +1996,18 @@ void InnerLoopVectorizer::createEmptyLoop() { IdxTy): ConstantInt::get(IdxTy, 0); + // We need an instruction to anchor the overflow check on. StartIdx needs to + // be defined before the overflow check branch. Because the scalar preheader + // is going to merge the start index and so the overflow branch block needs to + // contain a definition of the start index. + Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd( + StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor", + BypassBlock->getTerminator()); + + // Count holds the overall loop count (N). + Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), + BypassBlock->getTerminator()); + LoopBypassBlocks.push_back(BypassBlock); // Split the single block loop into the two loop structure described above. @@ -2068,17 +2076,18 @@ void InnerLoopVectorizer::createEmptyLoop() { // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. - Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, - "cmp.zero"); + Value *Cmp = + BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); BasicBlock *LastBypassBlock = BypassBlock; // Generate code to check that the loops trip count that we computed by adding // one to the backedge-taken count will not overflow. { - auto PastOverflowCheck = std::next(BasicBlock::iterator(CheckBCOverflow)); + auto PastOverflowCheck = + std::next(BasicBlock::iterator(OverflowCheckAnchor)); BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); + LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); if (ParentLoop) ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase()); LoopBypassBlocks.push_back(CheckBlock); diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index 3102ec59bd41..7dabcb2ba04f 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -135,3 +135,37 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; - {{ size={ItemSize}, buckets={NumBuckets} }} + {{ size={NumItems}, buckets={NumBuckets} }} - ItemSize + NumItems NumBuckets NumBuckets From 2bd8e0cde3b451e14104e982b5fb4a003c83a9ae Mon Sep 17 00:00:00 2001 From: Alp Toker Date: Wed, 9 Jul 2014 06:27:05 +0000 Subject: [PATCH 0688/1155] Prospective -fsanitize=memory build fix following r212586 This -f group flag appears to influence linker flags, breaking the usual rules and causing CMake's link invocation to fail during feature detection due to missing link dependencies (msan_*). Let's forcibly add it for now to get things the way they were before feature detection started working. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212590 91177308-0d34-0410-b5e6-96231b3b80d8 --- cmake/modules/HandleLLVMOptions.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 771ea8520670..b8577f70666d 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -343,12 +343,12 @@ if(LLVM_USE_SANITIZER) if (LLVM_ON_UNIX) if (LLVM_USE_SANITIZER STREQUAL "Address") append_common_sanitizer_flags() - add_flag_or_print_warning("-fsanitize=address" FSANITIZE_ADDRESS) + append("-fsanitize=address" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) elseif (LLVM_USE_SANITIZER MATCHES "Memory(WithOrigins)?") append_common_sanitizer_flags() - add_flag_or_print_warning("-fsanitize=memory" FSANITIZE_MEMORY) + append("-fsanitize=memory" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) if(LLVM_USE_SANITIZER STREQUAL "MemoryWithOrigins") - add_flag_or_print_warning("-fsanitize-memory-track-origins" FSANITIZE_MEMORY_TRACK_ORIGINS) + append("-fsanitize-memory-track-origins" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() else() message(WARNING "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}") From 7b39c067ac3c1ff08e25ef7b48444440c4136bd2 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Wed, 9 Jul 2014 08:30:15 +0000 Subject: [PATCH 0689/1155] SourceMgr: consistently use 'unsigned' for the memory buffer ID type git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212595 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/SourceMgr.h | 8 ++++---- lib/Support/SourceMgr.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index 104b6962c761..4717553bd0de 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -99,7 +99,7 @@ class SourceMgr { return Buffers[i - 1].Buffer; } - size_t getNumBuffers() const { + unsigned getNumBuffers() const { return Buffers.size(); } @@ -115,7 +115,7 @@ class SourceMgr { /// Add a new source buffer to this source manager. This takes ownership of /// the memory buffer. - size_t AddNewSourceBuffer(MemoryBuffer *F, SMLoc IncludeLoc) { + unsigned AddNewSourceBuffer(MemoryBuffer *F, SMLoc IncludeLoc) { SrcBuffer NB; NB.Buffer = F; NB.IncludeLoc = IncludeLoc; @@ -129,8 +129,8 @@ class SourceMgr { /// If no file is found, this returns 0, otherwise it returns the buffer ID /// of the stacked file. The full path to the included file can be found in /// \p IncludedFile. - size_t AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, - std::string &IncludedFile); + unsigned AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, + std::string &IncludedFile); /// Return the ID of the buffer containing the specified location. /// diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 595c6eec1107..003cb56e6cb5 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -49,9 +49,9 @@ SourceMgr::~SourceMgr() { } } -size_t SourceMgr::AddIncludeFile(const std::string &Filename, - SMLoc IncludeLoc, - std::string &IncludedFile) { +unsigned SourceMgr::AddIncludeFile(const std::string &Filename, + SMLoc IncludeLoc, + std::string &IncludedFile) { IncludedFile = Filename; ErrorOr> NewBufOrErr = MemoryBuffer::getFile(IncludedFile.c_str()); From 0b3c7cbf46e6670b9ed4d04159faf53c29681203 Mon Sep 17 00:00:00 2001 From: Timur Iskhodzhanov Date: Wed, 9 Jul 2014 08:35:33 +0000 Subject: [PATCH 0690/1155] [ASan/Win] Don't instrument COMDAT globals. Properly fixes PR20244. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212596 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/AddressSanitizer.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8f9355cb8311..5e5ddc127eee 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -923,11 +923,14 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { if (!G->hasInitializer()) return false; if (GlobalWasGeneratedByAsan(G)) return false; // Our own global. // Touch only those globals that will not be defined in other modules. - // Don't handle ODR type linkages since other modules may be built w/o asan. + // Don't handle ODR linkage types and COMDATs since other modules may be built + // without ASan. if (G->getLinkage() != GlobalVariable::ExternalLinkage && G->getLinkage() != GlobalVariable::PrivateLinkage && G->getLinkage() != GlobalVariable::InternalLinkage) return false; + if (G->hasComdat()) + return false; // Two problems with thread-locals: // - The address of the main thread's copy can't be computed at link-time. // - Need to poison all copies, not just the main thread's one. @@ -946,13 +949,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { return false; } - // Don't instrument private COMDAT globals on Windows until PR20244 (linkage - // of vftables with RTTI) is properly fixed. - llvm::Triple TargetTriple(G->getParent()->getTargetTriple()); - if (G->hasComdat() && G->getLinkage() == GlobalVariable::PrivateLinkage && - TargetTriple.isWindowsMSVCEnvironment()) - return false; - if (G->hasSection()) { StringRef Section(G->getSection()); // Ignore the globals from the __OBJC section. The ObjC runtime assumes From 98eac0a2445621d985e165d0774459d2361008aa Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 9 Jul 2014 10:06:58 +0000 Subject: [PATCH 0691/1155] [x86] Re-apply a variant of the x86 side of r212324 now that the rest has settled without incident, removing the x86-specific and overly strict 'isVectorSplat' routine in favor of generic and more powerful splat detection. The primary motivation and result of this is that the x86 backend can now see through splats which contain undef elements. This is essential if we are using a widening form of legalization and I've updated a test case to also run in that mode as before this change the generated code for the test case was completely scalarized. This version of the patch much more carefully handles the undef lanes. - We aren't overly conservative about them in the shift lowering (where we will never use the splat itself). - One place where the splat would have been re-used by the existing code now explicitly constructs a new constant splat that will be safe. - The broadcast lowering is much more reasonable with undefs by doing a correct check of whether the splat is the only user of a loaded value, checking that the splat actually crosses multiple lanes before using a broadcast, and handling broadcasts of non-constant splats. As a consequence of the last bullet, the weird usage of vpshufd instead of vbroadcast is gone, and we actually can lower an AVX splat with vbroadcastss where before we emitted a really strange pattern of a vector load and a manual splat across the vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 144 ++++++++++++++--------------- test/CodeGen/X86/avx-splat.ll | 9 +- test/CodeGen/X86/widen_cast-4.ll | 25 ++++- 3 files changed, 96 insertions(+), 82 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 306a659c9831..daf141bfcec1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4858,19 +4858,6 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return true; } -/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are -/// all the same. -static bool isSplatVector(SDNode *N) { - if (N->getOpcode() != ISD::BUILD_VECTOR) - return false; - - SDValue SplatValue = N->getOperand(0); - for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i) != SplatValue) - return false; - return true; -} - /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved /// to an zero vector. /// FIXME: move to dag combiner / method on ShuffleVectorSDNode @@ -5779,18 +5766,22 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, return SDValue(); case ISD::BUILD_VECTOR: { - // The BUILD_VECTOR node must be a splat. - if (!isSplatVector(Op.getNode())) + auto *BVOp = cast(Op.getNode()); + BitVector UndefElements; + SDValue Splat = BVOp->getSplatValue(&UndefElements); + + // We need a splat of a single value to use broadcast, and it doesn't + // make any sense if the value is only in one element of the vector. + if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1) return SDValue(); - Ld = Op.getOperand(0); + Ld = Splat; ConstSplatVal = (Ld.getOpcode() == ISD::Constant || - Ld.getOpcode() == ISD::ConstantFP); + Ld.getOpcode() == ISD::ConstantFP); - // The suspected load node has several users. Make sure that all - // of its users are from the BUILD_VECTOR node. - // Constants may have multiple users. - if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) + // Make sure that all of the users of a non-constant load are from the + // BUILD_VECTOR node. + if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode())) return SDValue(); break; } @@ -9375,8 +9366,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. - V1IsSplat = isSplatVector(V1.getNode()); - V2IsSplat = isSplatVector(V2.getNode()); + BitVector UndefElements; + if (auto *BVOp = dyn_cast(V1.getNode())) + if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none()) + V1IsSplat = true; + if (auto *BVOp = dyn_cast(V2.getNode())) + if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none()) + V2IsSplat = true; // Canonicalize the splat or undef, if present, to be on the RHS. if (!V2IsUndef && V1IsSplat && !V2IsSplat) { @@ -15171,10 +15167,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SDValue Amt = Op.getOperand(1); // Optimize shl/srl/sra with constant shift amount. - if (isSplatVector(Amt.getNode())) { - SDValue SclrAmt = Amt->getOperand(0); - if (ConstantSDNode *C = dyn_cast(SclrAmt)) { - uint64_t ShiftAmt = C->getZExtValue(); + if (auto *BVAmt = dyn_cast(Amt)) { + if (auto *ShiftConst = BVAmt->getConstantSplatNode()) { + uint64_t ShiftAmt = ShiftConst->getZExtValue(); if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || (Subtarget->hasInt256() && @@ -19423,28 +19418,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS)) return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); - // If the RHS is a constant we have to reverse the const canonicalization. - // x > C-1 ? x+-C : 0 --> subus x, C - if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && - isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) { - APInt A = cast(OpRHS.getOperand(0))->getAPIntValue(); - if (CondRHS.getConstantOperandVal(0) == -A-1) - return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, - DAG.getConstant(-A, VT)); - } - - // Another special case: If C was a sign bit, the sub has been - // canonicalized into a xor. - // FIXME: Would it be better to use computeKnownBits to determine whether - // it's safe to decanonicalize the xor? - // x s< 0 ? x^C : 0 --> subus x, C - if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && - ISD::isBuildVectorAllZeros(CondRHS.getNode()) && - isSplatVector(OpRHS.getNode())) { - APInt A = cast(OpRHS.getOperand(0))->getAPIntValue(); - if (A.isSignBit()) - return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); - } + if (auto *OpRHSBV = dyn_cast(OpRHS)) + if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) { + if (auto *CondRHSBV = dyn_cast(CondRHS)) + if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode()) + // If the RHS is a constant we have to reverse the const + // canonicalization. + // x > C-1 ? x+-C : 0 --> subus x, C + if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && + CondRHSConst->getAPIntValue() == + (-OpRHSConst->getAPIntValue() - 1)) + return DAG.getNode( + X86ISD::SUBUS, DL, VT, OpLHS, + DAG.getConstant(-OpRHSConst->getAPIntValue(), VT)); + + // Another special case: If C was a sign bit, the sub has been + // canonicalized into a xor. + // FIXME: Would it be better to use computeKnownBits to determine + // whether it's safe to decanonicalize the xor? + // x s< 0 ? x^C : 0 --> subus x, C + if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && + ISD::isBuildVectorAllZeros(CondRHS.getNode()) && + OpRHSConst->getAPIntValue().isSignBit()) + // Note that we have to rebuild the RHS constant here to ensure we + // don't rely on particular values of undef lanes. + return DAG.getNode( + X86ISD::SUBUS, DL, VT, OpLHS, + DAG.getConstant(OpRHSConst->getAPIntValue(), VT)); + } } } @@ -20152,16 +20153,15 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { // vector operations in many cases. Also, on sandybridge ADD is faster than // shl. // (shl V, 1) -> add V,V - if (isSplatVector(N1.getNode())) { - assert(N0.getValueType().isVector() && "Invalid vector shift type"); - ConstantSDNode *N1C = dyn_cast(N1->getOperand(0)); - // We shift all of the values by one. In many cases we do not have - // hardware support for this operation. This is better expressed as an ADD - // of two values. - if (N1C && (1 == N1C->getZExtValue())) { - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); + if (auto *N1BV = dyn_cast(N1)) + if (auto *N1SplatC = N1BV->getConstantSplatNode()) { + assert(N0.getValueType().isVector() && "Invalid vector shift type"); + // We shift all of the values by one. In many cases we do not have + // hardware support for this operation. This is better expressed as an ADD + // of two values. + if (N1SplatC->getZExtValue() == 1) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); } - } return SDValue(); } @@ -20180,10 +20180,9 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, SDValue Amt = N->getOperand(1); SDLoc DL(N); - if (isSplatVector(Amt.getNode())) { - SDValue SclrAmt = Amt->getOperand(0); - if (ConstantSDNode *C = dyn_cast(SclrAmt)) { - APInt ShiftAmt = C->getAPIntValue(); + if (auto *AmtBV = dyn_cast(Amt)) + if (auto *AmtSplat = AmtBV->getConstantSplatNode()) { + APInt ShiftAmt = AmtSplat->getAPIntValue(); unsigned MaxAmount = VT.getVectorElementType().getSizeInBits(); // SSE2/AVX2 logical shifts always return a vector of 0s @@ -20193,7 +20192,6 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, if (ShiftAmt.trunc(8).uge(MaxAmount)) return getZeroVector(VT, Subtarget, DAG, DL); } - } return SDValue(); } @@ -20387,9 +20385,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // The right side has to be a 'trunc' or a constant vector. bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE; - bool RHSConst = (isSplatVector(N1.getNode()) && - isa(N1->getOperand(0))); - if (!RHSTrunc && !RHSConst) + ConstantSDNode *RHSConstSplat; + if (auto *RHSBV = dyn_cast(N1)) + RHSConstSplat = RHSBV->getConstantSplatNode(); + if (!RHSTrunc && !RHSConstSplat) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -20399,9 +20398,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); - if (RHSConst) { + if (RHSConstSplat) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), - N1->getOperand(0)); + SDValue(RHSConstSplat, 0)); SmallVector C(WideVT.getVectorNumElements(), N1); N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C); } else if (RHSTrunc) { @@ -20547,12 +20546,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); unsigned SraAmt = ~0; if (Mask.getOpcode() == ISD::SRA) { - SDValue Amt = Mask.getOperand(1); - if (isSplatVector(Amt.getNode())) { - SDValue SclrAmt = Amt->getOperand(0); - if (ConstantSDNode *C = dyn_cast(SclrAmt)) - SraAmt = C->getZExtValue(); - } + if (auto *AmtBV = dyn_cast(Mask.getOperand(1))) + if (auto *AmtConst = AmtBV->getConstantSplatNode()) + SraAmt = AmtConst->getZExtValue(); } else if (Mask.getOpcode() == X86ISD::VSRAI) { SDValue SraC = Mask.getOperand(1); SraAmt = cast(SraC)->getZExtValue(); diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index 5d0781531f4d..b1b2f8b97a73 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -43,13 +43,10 @@ entry: ret <4 x double> %vecinit6.i } -; Test this simple opt: +; Test this turns into a broadcast: ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> -; To: -; shuffle (vload ptr)), undef, <1, 1, 1, 1> -; CHECK: vmovdqa -; CHECK-NEXT: vpshufd $-1 -; CHECK-NEXT: vinsertf128 $1 +; +; CHECK: vbroadcastss define <8 x float> @funcE() nounwind { allocas: %udx495 = alloca [18 x [18 x float]], align 32 diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll index 1bc06a77cbf7..19b84f19a4ff 100644 --- a/test/CodeGen/X86/widen_cast-4.ll +++ b/test/CodeGen/X86/widen_cast-4.ll @@ -1,8 +1,9 @@ ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s -; CHECK: psraw -; CHECK: psraw +; RUN: llc < %s -march=x86 -mattr=+sse4.2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { +; CHECK-LABEL: update: +; CHECK-WIDE-LABEL: update: entry: %dst_i.addr = alloca i64* ; [#uses=2] %src_i.addr = alloca i64* ; [#uses=2] @@ -44,6 +45,26 @@ forbody: ; preds = %forcond %shr = ashr <8 x i8> %add, < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 > ; <<8 x i8>> [#uses=1] store <8 x i8> %shr, <8 x i8>* %arrayidx10 br label %forinc +; CHECK: %forbody +; CHECK: pmovzxbw +; CHECK-NEXT: paddw +; CHECK-NEXT: psllw $8 +; CHECK-NEXT: psraw $8 +; CHECK-NEXT: psraw $2 +; CHECK-NEXT: pshufb +; CHECK-NEXT: movlpd +; +; FIXME: We shouldn't require both a movd and an insert. +; CHECK-WIDE: %forbody +; CHECK-WIDE: movd +; CHECK-WIDE-NEXT: pinsrd +; CHECK-WIDE-NEXT: paddb +; CHECK-WIDE-NEXT: psrlw $2 +; CHECK-WIDE-NEXT: pand +; CHECK-WIDE-NEXT: pxor +; CHECK-WIDE-NEXT: psubb +; CHECK-WIDE-NEXT: pextrd +; CHECK-WIDE-NEXT: movd forinc: ; preds = %forbody %tmp15 = load i32* %i ; [#uses=1] From 3b52084af899f5eaf7c24845ccaf3d4b6f87e8db Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 9 Jul 2014 10:07:36 +0000 Subject: [PATCH 0692/1155] Add ability to emit internal instruction representation to CodeGen assembly output. Summary: This patch re-uses the implementation of 'llvm-mc -show-inst' and makes it available to llc as 'llc -asm-show-inst'. This is necessary to test parts of MIPS32r6/MIPS64r6 without resorting to 'llc -filetype=obj' tests. For example, on MIPS32r2 and earlier we use the 'jr $rs' instruction for indirect branches and returns. On MIPS32r6, we no longer have 'jr $rs' and use 'jalr $zero, $rs' instead. The catch is that, on MIPS32r6, 'jr $rs' is an alias for 'jalr $zero, $rs' and is the preferred way of writing this instruction. As a result, all MIPS ISA's emit 'jr $rs' in their assembly output and the assembler encodes this to different opcodes according to the ISA. Using this option, we can check that the MCInst really is a JR or a JALR by matching the emitted comment. This removes the need for a 'llc -filetype=obj' test. Reviewers: rafael, dsanders Reviewed By: dsanders Subscribers: zoran.jovanovic, llvm-commits Differential Revision: http://reviews.llvm.org/D4267 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212603 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCTargetOptionsCommandFlags.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h index 344983dec5f2..6d4eb0ef5911 100644 --- a/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -36,12 +36,17 @@ cl::opt RelaxAll("mc-relax-all", cl::opt DwarfVersion("dwarf-version", cl::desc("Dwarf version"), cl::init(0)); +cl::opt ShowMCInst("asm-show-inst", + cl::desc("Emit internal instruction representation to " + "assembly file")); + static inline MCTargetOptions InitMCTargetOptionsFromFlags() { MCTargetOptions Options; Options.SanitizeAddress = (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress); Options.MCRelaxAll = RelaxAll; Options.DwarfVersion = DwarfVersion; + Options.ShowMCInst = ShowMCInst; return Options; } From 7c2ef822f7de4ad4fd255b9cb7e5e355c981db2f Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 9 Jul 2014 10:16:07 +0000 Subject: [PATCH 0693/1155] [mips][mips64r6] Use JALR for returns instead of JR (which is not available on MIPS32r6/MIPS64r6) Summary: RET, and RET_MM have been replaced by a pseudo named PseudoReturn. In addition a version with a 64-bit GPR named PseudoReturn64 has been added. Instruction selection for a return matches RetRA, which is expanded post register allocation to PseudoReturn/PseudoReturn64. During MipsAsmPrinter, this PseudoReturn/PseudoReturn64 are emitted as: - (JALR64 $zero, $rs) on MIPS64r6 - (JALR $zero, $rs) on MIPS32r6 - (JR_MM $rs) on microMIPS - (JR $rs) otherwise On MIPS32r6/MIPS64r6, 'jr $rs' is an alias for 'jalr $zero, $rs'. To aid development and review (specifically, to ensure all cases of jr are updated), these aliases are temporarily named 'r6.jr' instead of 'jr'. A follow up patch will change them back to the correct mnemonic. Added (JALR $zero, $rs) to MipsNaClELFStreamer's definition of an indirect jump, and removed it from its definition of a call. Note: I haven't accounted for MIPS64 in MipsNaClELFStreamer since it's doesn't appear to account for any MIPS64-specifics. The return instruction created as part of eh_return expansion is now expanded using expandRetRA() so we use the right return instruction on MIPS32r6/MIPS64r6 ('jalr $zero, $rs'). Also, fixed a misuse of isABI_N64() to detect 64-bit wide registers in expandEhReturn(). Reviewers: jkolek, vmedic, mseaborn, zoran.jovanovic, dsanders Reviewed By: dsanders Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D4268 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212604 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Mips/MCTargetDesc/MipsNaClELFStreamer.cpp | 20 ++++- lib/Target/Mips/MicroMipsInstrInfo.td | 1 - lib/Target/Mips/Mips32r6InstrInfo.td | 1 + lib/Target/Mips/Mips64InstrInfo.td | 2 + lib/Target/Mips/Mips64r6InstrInfo.td | 8 ++ lib/Target/Mips/MipsAsmPrinter.cpp | 42 ++++++++++ lib/Target/Mips/MipsAsmPrinter.h | 2 + lib/Target/Mips/MipsInstrInfo.td | 24 ++++-- lib/Target/Mips/MipsSEInstrInfo.cpp | 26 +++--- lib/Target/Mips/MipsSEInstrInfo.h | 3 +- test/CodeGen/Mips/eh-return32.ll | 14 ++-- test/CodeGen/Mips/eh-return64.ll | 17 ++-- test/CodeGen/Mips/llvm-ir/ret.ll | 83 ++++++++++++++----- 13 files changed, 183 insertions(+), 60 deletions(-) diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index ce4b9a8f9e98..6cde8f9ae3e4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -48,7 +48,13 @@ class MipsNaClELFStreamer : public MipsELFStreamer { bool PendingCall; bool isIndirectJump(const MCInst &MI) { - return MI.getOpcode() == Mips::JR || MI.getOpcode() == Mips::RET; + if (MI.getOpcode() == Mips::JALR) { + // MIPS32r6/MIPS64r6 doesn't have a JR instruction and uses JALR instead. + // JALR is an indirect branch if the link register is $0. + assert(MI.getOperand(0).isReg()); + return MI.getOperand(0).getReg() == Mips::ZERO; + } + return MI.getOpcode() == Mips::JR; } bool isStackPointerFirstOperand(const MCInst &MI) { @@ -56,7 +62,9 @@ class MipsNaClELFStreamer : public MipsELFStreamer { && MI.getOperand(0).getReg() == Mips::SP); } - bool isCall(unsigned Opcode, bool *IsIndirectCall) { + bool isCall(const MCInst &MI, bool *IsIndirectCall) { + unsigned Opcode = MI.getOpcode(); + *IsIndirectCall = false; switch (Opcode) { @@ -71,6 +79,12 @@ class MipsNaClELFStreamer : public MipsELFStreamer { return true; case Mips::JALR: + // JALR is only a call if the link register is not $0. Otherwise it's an + // indirect branch. + assert(MI.getOperand(0).isReg()); + if (MI.getOperand(0).getReg() == Mips::ZERO) + return false; + *IsIndirectCall = true; return true; } @@ -154,7 +168,7 @@ class MipsNaClELFStreamer : public MipsELFStreamer { // Sandbox calls by aligning call and branch delay to the bundle end. // For indirect calls, emit the mask before the call. bool IsIndirectCall; - if (isCall(Inst.getOpcode(), &IsIndirectCall)) { + if (isCall(Inst, &IsIndirectCall)) { if (PendingCall) report_fatal_error("Dangerous instruction in branch delay slot!"); diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 9904bc690c23..87a3a3e29ca2 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -246,7 +246,6 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { } def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>; def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>; - def RET_MM : MMRel, RetBase<"ret", GPR32Opnd>, JR_FM_MM<0x3c>; /// Branch Instructions def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>, diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 886f1aaaef51..ebe014a53566 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -733,6 +733,7 @@ def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6; //===----------------------------------------------------------------------===// def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6; +def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6; //===----------------------------------------------------------------------===// // diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 2b9cda7ac83c..3b8f9e3a21e5 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -186,6 +186,8 @@ def JALR64Pseudo : JumpLinkRegPseudo; def TAILCALL64_R : TailCallReg; } +def PseudoReturn64 : PseudoReturnBase; + /// Multiply and Divide Instructions. def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1c>, ISA_MIPS3_NOT_32R6_64R6; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index 75560f41bc13..5452175f2586 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -105,6 +105,14 @@ let DecoderNamespace = "Mips32r6_64r6_GP64" in { def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64; } +//===----------------------------------------------------------------------===// +// +// Instruction Aliases +// +//===----------------------------------------------------------------------===// + +def : MipsInstAlias<"jr $rs", (JALR64 ZERO_64, GPR64Opnd:$rs), 1>, ISA_MIPS64R6; + //===----------------------------------------------------------------------===// // // Patterns and Pseudo Instructions diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 16db9ac53245..60ec0e2eca53 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -91,6 +91,42 @@ bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { #include "MipsGenMCPseudoLowering.inc" +void MipsAsmPrinter::emitPseudoReturn(MCStreamer &OutStreamer, + const MachineInstr *MI) { + // Lower PseudoReturn to JR, JR_MM, JALR, or JALR64 as appropriate for the + // target + bool HasLinkReg = false; + MCInst TmpInst0; + + if (Subtarget->hasMips64r6()) { + // MIPS64r6 should use (JALR64 ZERO_64, $rs) + TmpInst0.setOpcode(Mips::JALR64); + HasLinkReg = true; + } else if (Subtarget->hasMips32r6()) { + // MIPS32r6 should use (JALR ZERO, $rs) + TmpInst0.setOpcode(Mips::JALR); + HasLinkReg = true; + } else if (Subtarget->inMicroMipsMode()) + // microMIPS should use (JR_MM $rs) + TmpInst0.setOpcode(Mips::JR_MM); + else { + // Everything else should use (JR $rs) + TmpInst0.setOpcode(Mips::JR); + } + + MCOperand MCOp; + + if (HasLinkReg) { + unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; + TmpInst0.addOperand(MCOperand::CreateReg(ZeroReg)); + } + + lowerOperand(MI->getOperand(0), MCOp); + TmpInst0.addOperand(MCOp); + + EmitToStreamer(OutStreamer, TmpInst0); +} + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { MipsTargetStreamer &TS = getTargetStreamer(); TS.setCanHaveModuleDir(false); @@ -144,6 +180,12 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(OutStreamer, &*I)) continue; + if (I->getOpcode() == Mips::PseudoReturn || + I->getOpcode() == Mips::PseudoReturn64) { + emitPseudoReturn(OutStreamer, &*I); + continue; + } + // The inMips16Mode() test is not permanent. // Some instructions are marked as pseudo right now which // would make the test fail for the wrong reason but diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index e82b145925d7..b0f623305924 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -40,6 +40,8 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); + void emitPseudoReturn(MCStreamer &OutStreamer, const MachineInstr *MI); + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 684199ba9b9b..895cf4db582a 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -749,14 +749,6 @@ class IndirectBranch : let isIndirectBranch = 1; } -// Return instruction -class RetBase: JumpFR { - let isReturn = 1; - let isCodeGenOnly = 1; - let hasCtrlDep = 1; - let hasExtraSrcRegAllocReq = 1; -} - // Jump and Link (Call) let isCall=1, hasDelaySlot=1, Defs = [RA] in { class JumpLink : @@ -1229,7 +1221,21 @@ def BAL_BR : BAL_BR_Pseudo; def TAILCALL : TailCall; def TAILCALL_R : TailCallReg; -def RET : MMRel, RetBase<"ret", GPR32Opnd>, MTLO_FM<8>; +// Return instruction +// RetRA is expanded into this after register allocation and then MipsAsmPrinter +// expands this into JR, or JALR depending on the ISA. +class PseudoReturnBase : MipsPseudo<(outs), (ins RO:$rs), + [], IIBranch> { + let isTerminator = 1; + let isBarrier = 1; + let hasDelaySlot = 1; + let isReturn = 1; + let isCodeGenOnly = 1; + let hasCtrlDep = 1; + let hasExtraSrcRegAllocReq = 1; +} + +def PseudoReturn : PseudoReturnBase; // Exception handling related node and instructions. // The conversion sequence is: diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index e82c8cff7225..32da7492dad4 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -272,7 +272,7 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { default: return false; case Mips::RetRA: - expandRetRA(MBB, MI, Mips::RET); + expandRetRA(MBB, MI); break; case Mips::PseudoMFHI: Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI; @@ -428,9 +428,14 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { } void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned Opc) const { - BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA); + MachineBasicBlock::iterator I) const { + const auto &Subtarget = TM.getSubtarget(); + + if (Subtarget.isGP64bit()) + BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64)) + .addReg(Mips::RA_64); + else + BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA); } std::pair @@ -591,17 +596,16 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, // indirect jump to TargetReg const MipsSubtarget &STI = TM.getSubtarget(); unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR; - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; - unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned SP = STI.isGP64bit() ? Mips::SP_64 : Mips::SP; + unsigned RA = STI.isGP64bit() ? Mips::RA_64 : Mips::RA; + unsigned T9 = STI.isGP64bit() ? Mips::T9_64 : Mips::T9; + unsigned ZERO = STI.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; unsigned OffsetReg = I->getOperand(0).getReg(); unsigned TargetReg = I->getOperand(1).getReg(); // addu $ra, $v0, $zero // addu $sp, $sp, $v1 - // jr $ra + // jr $ra (via RetRA) if (TM.getRelocationModel() == Reloc::PIC_) BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), T9) .addReg(TargetReg).addReg(ZERO); @@ -609,7 +613,7 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, .addReg(TargetReg).addReg(ZERO); BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP) .addReg(SP).addReg(OffsetReg); - BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA); + expandRetRA(MBB, I); } const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) { diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index aa68552066c3..9ac94ce38f66 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -81,8 +81,7 @@ class MipsSEInstrInfo : public MipsInstrInfo { private: unsigned getAnalyzableBrOpc(unsigned Opc) const override; - void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned Opc) const; + void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; std::pair compareOpndSize(unsigned Opc, const MachineFunction &MF) const; diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll index c3003b34b162..748050c4d34b 100644 --- a/test/CodeGen/Mips/eh-return32.ll +++ b/test/CodeGen/Mips/eh-return32.ll @@ -1,4 +1,6 @@ -; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s +; RUN: llc -march=mipsel -mcpu=mips32 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6 +; RUN: llc -march=mipsel -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=NOT-R6 +; RUN: llc -march=mipsel -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=CHECK -check-prefix=R6 declare void @llvm.eh.return.i32(i32, i8*) declare void @foo(...) @@ -9,7 +11,7 @@ entry: call void @llvm.eh.return.i32(i32 %offset, i8* %handler) unreachable -; CHECK: f1 +; CHECK: f1: ; CHECK: addiu $sp, $sp, -[[spoffset:[0-9]+]] ; check that $a0-$a3 are saved on stack. @@ -41,7 +43,8 @@ entry: ; CHECK: addiu $sp, $sp, [[spoffset]] ; CHECK: move $25, $2 ; CHECK: move $ra, $2 -; CHECK: jr $ra +; NOT-R6: jr $ra # Date: Wed, 9 Jul 2014 10:21:59 +0000 Subject: [PATCH 0694/1155] [mips][mips64r6] Use JALR for indirect branches instead of JR (which is not available on MIPS32r6/MIPS64r6) Summary: This completes the change to use JALR instead of JR on MIPS32r6/MIPS64r6. Reviewers: jkolek, vmedic, zoran.jovanovic, dsanders Reviewed By: dsanders Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D4269 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212605 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 8 +++++--- lib/Target/Mips/Mips64InstrInfo.td | 21 +++++++++++---------- lib/Target/Mips/MipsAsmPrinter.cpp | 14 ++++++++------ lib/Target/Mips/MipsAsmPrinter.h | 6 +++++- lib/Target/Mips/MipsInstrInfo.td | 23 ++++++++++++++++++----- test/CodeGen/Mips/llvm-ir/call.ll | 12 +++++++++--- test/CodeGen/Mips/llvm-ir/indirectbr.ll | 23 +++++++++++++++-------- 7 files changed, 71 insertions(+), 36 deletions(-) diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 11166c45a880..5e4eebb62c12 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -1370,9 +1370,11 @@ def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)), (Jal16 texternalsym:$dst)>; // Indirect branch -def: Mips16Pat< - (brind CPU16Regs:$rs), - (JrcRx16 CPU16Regs:$rs)>; +def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> { + // Ensure that the addition of MIPS32r6/MIPS64r6 support does not change + // MIPS16's behaviour. + let AddedComplexity = 1; +} // Jump and Link (Call) let isCall=1, hasDelaySlot=0 in diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 3b8f9e3a21e5..f0b6814e37cc 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -174,19 +174,20 @@ def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3_NOT_32R6_64R6; /// Jump and Branch Instructions let isCodeGenOnly = 1 in { -def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>; -def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>; -def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>; -def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>; -def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>; -def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; -def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; -def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; -def JALR64Pseudo : JumpLinkRegPseudo; -def TAILCALL64_R : TailCallReg; + def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>; + def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>; + def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>; + def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>; + def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>; + def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; + def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; + def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; + def JALR64Pseudo : JumpLinkRegPseudo; + def TAILCALL64_R : TailCallReg; } def PseudoReturn64 : PseudoReturnBase; +def PseudoIndirectBranch64 : PseudoIndirectBranchBase; /// Multiply and Divide Instructions. def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 60ec0e2eca53..4d4fcd96a4fe 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -91,10 +91,10 @@ bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { #include "MipsGenMCPseudoLowering.inc" -void MipsAsmPrinter::emitPseudoReturn(MCStreamer &OutStreamer, - const MachineInstr *MI) { - // Lower PseudoReturn to JR, JR_MM, JALR, or JALR64 as appropriate for the - // target +// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to JR, JR_MM, +// JALR, or JALR64 as appropriate for the target +void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI) { bool HasLinkReg = false; MCInst TmpInst0; @@ -181,8 +181,10 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { continue; if (I->getOpcode() == Mips::PseudoReturn || - I->getOpcode() == Mips::PseudoReturn64) { - emitPseudoReturn(OutStreamer, &*I); + I->getOpcode() == Mips::PseudoReturn64 || + I->getOpcode() == Mips::PseudoIndirectBranch || + I->getOpcode() == Mips::PseudoIndirectBranch64) { + emitPseudoIndirectBranch(OutStreamer, &*I); continue; } diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index b0f623305924..967aa0b16416 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -40,7 +40,11 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); - void emitPseudoReturn(MCStreamer &OutStreamer, const MachineInstr *MI); + // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch, + // and PseudoIndirectBranch64 as a JR, JR_MM, JALR, or JALR64 as appropriate + // for the target. + void emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI); // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 895cf4db582a..d3dd87836c01 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -743,8 +743,7 @@ class JumpFR; // Indirect branch -class IndirectBranch : - JumpFR { +class IndirectBranch : JumpFR { let isBranch = 1; let isIndirectBranch = 1; } @@ -1221,9 +1220,23 @@ def BAL_BR : BAL_BR_Pseudo; def TAILCALL : TailCall; def TAILCALL_R : TailCallReg; -// Return instruction -// RetRA is expanded into this after register allocation and then MipsAsmPrinter -// expands this into JR, or JALR depending on the ISA. +// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64 +// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA. +class PseudoIndirectBranchBase : + MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> { + let isTerminator=1; + let isBarrier=1; + let hasDelaySlot = 1; + let isBranch = 1; + let isIndirectBranch = 1; +} + +def PseudoIndirectBranch : PseudoIndirectBranchBase; + +// Return instructions are matched as a RetRA instruction, then ar expanded +// into PseudoReturn/PseudoReturn64 after register allocation. Finally, +// MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the +// ISA. class PseudoReturnBase : MipsPseudo<(outs), (ins RO:$rs), [], IIBranch> { let isTerminator = 1; diff --git a/test/CodeGen/Mips/llvm-ir/call.ll b/test/CodeGen/Mips/llvm-ir/call.ll index 0752fc7ac7db..4cbf43cae28e 100644 --- a/test/CodeGen/Mips/llvm-ir/call.ll +++ b/test/CodeGen/Mips/llvm-ir/call.ll @@ -3,8 +3,11 @@ ; FIXME: We should remove the need for -enable-mips-tail-calls ; RUN: llc -march=mips -mcpu=mips32 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 ; RUN: llc -march=mips -mcpu=mips32r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 +; RUN: llc -march=mips -mcpu=mips32r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 ; RUN: llc -march=mips64 -mcpu=mips4 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 ; RUN: llc -march=mips64 -mcpu=mips64 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 +; RUN: llc -march=mips64 -mcpu=mips64r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 +; RUN: llc -march=mips64 -mcpu=mips64r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 declare void @extern_void_void() declare i32 @extern_i32_void() @@ -63,7 +66,8 @@ define void @musttail_call_void_void() { ; N64: ld $[[TGT:[0-9]+]], %call16(extern_void_void)($gp) -; ALL: jr $[[TGT]] +; NOT-R6: jr $[[TGT]] +; R6: r6.jr $[[TGT]] musttail call void @extern_void_void() ret void @@ -76,7 +80,8 @@ define i32 @musttail_call_i32_void() { ; N64: ld $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp) -; ALL: jr $[[TGT]] +; NOT-R6: jr $[[TGT]] +; R6: r6.jr $[[TGT]] %1 = musttail call i32 @extern_i32_void() ret i32 %1 @@ -89,7 +94,8 @@ define float @musttail_call_float_void() { ; N64: ld $[[TGT:[0-9]+]], %call16(extern_float_void)($gp) -; ALL: jr $[[TGT]] +; NOT-R6: jr $[[TGT]] +; R6: r6.jr $[[TGT]] %1 = musttail call float @extern_float_void() ret float %1 diff --git a/test/CodeGen/Mips/llvm-ir/indirectbr.ll b/test/CodeGen/Mips/llvm-ir/indirectbr.ll index 49fbb004871d..d8fd78774553 100644 --- a/test/CodeGen/Mips/llvm-ir/indirectbr.ll +++ b/test/CodeGen/Mips/llvm-ir/indirectbr.ll @@ -1,19 +1,26 @@ ; Test all important variants of the unconditional 'br' instruction. -; RUN: llc -march=mips -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -; RUN: llc -march=mips64 -mcpu=mips4 < %s | FileCheck %s -check-prefix=ALL -; RUN: llc -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=ALL -; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL +; RUN: llc -march=mips -mcpu=mips32 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6 +; RUN: llc -march=mips -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6 +; RUN: llc -march=mips -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6 +; RUN: llc -march=mips64 -mcpu=mips4 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6 +; RUN: llc -march=mips64 -mcpu=mips64 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6 +; RUN: llc -march=mips64 -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6 +; RUN: llc -march=mips64 -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6 define i32 @br(i8 *%addr) { ; ALL-LABEL: br: -; ALL: jr $4 +; NOT-R6: jr $4 # Date: Wed, 9 Jul 2014 10:36:42 +0000 Subject: [PATCH 0695/1155] [x86] Initialize a pointer to null to fix a bug in r212602. This should restore GCC hosts (which happen to put the bad stuff into the pointer) and MSan, etc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212606 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index daf141bfcec1..188a0befcbdb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -20385,7 +20385,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // The right side has to be a 'trunc' or a constant vector. bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE; - ConstantSDNode *RHSConstSplat; + ConstantSDNode *RHSConstSplat = nullptr; if (auto *RHSBV = dyn_cast(N1)) RHSConstSplat = RHSBV->getConstantSplatNode(); if (!RHSTrunc && !RHSConstSplat) From 388704618eb783806077fe53c7fa1126d25ff897 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 9 Jul 2014 10:40:20 +0000 Subject: [PATCH 0696/1155] [mips][mips64r6] Correct cond names in the cmp.cond.[ds] instructions Summary: It seems we accidentally read the wrong column of the table MIPS64r6 spec and used the names for c.cond.fmt instead of cmp.cond.fmt. Differential Revision: http://reviews.llvm.org/D4387 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips32r6InstrFormats.td | 21 +++++---- lib/Target/Mips/Mips32r6InstrInfo.td | 62 ++++++++++++------------- test/CodeGen/Mips/analyzebranch.ll | 2 +- test/CodeGen/Mips/fcmp.ll | 32 ++++++------- test/CodeGen/Mips/select.ll | 40 ++++++++-------- test/MC/Disassembler/Mips/mips32r6.txt | 40 ++++++++-------- test/MC/Disassembler/Mips/mips64r6.txt | 40 ++++++++-------- test/MC/Mips/mips32r6/valid.s | 40 ++++++++-------- test/MC/Mips/mips64r6/valid.s | 40 ++++++++-------- 9 files changed, 159 insertions(+), 158 deletions(-) diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td index d09a3a0fcabc..e4ec96a92f5b 100644 --- a/lib/Target/Mips/Mips32r6InstrFormats.td +++ b/lib/Target/Mips/Mips32r6InstrFormats.td @@ -108,22 +108,23 @@ def FIELD_FMT_D : FIELD_FMT<0b10001>; class FIELD_CMP_COND Val> { bits<5> Value = Val; } -def FIELD_CMP_COND_F : FIELD_CMP_COND<0b00000>; +// Note: The CMP_COND_FMT names differ from the C_COND_FMT names. +def FIELD_CMP_COND_AF : FIELD_CMP_COND<0b00000>; def FIELD_CMP_COND_UN : FIELD_CMP_COND<0b00001>; def FIELD_CMP_COND_EQ : FIELD_CMP_COND<0b00010>; def FIELD_CMP_COND_UEQ : FIELD_CMP_COND<0b00011>; -def FIELD_CMP_COND_OLT : FIELD_CMP_COND<0b00100>; +def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b00100>; def FIELD_CMP_COND_ULT : FIELD_CMP_COND<0b00101>; -def FIELD_CMP_COND_OLE : FIELD_CMP_COND<0b00110>; +def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b00110>; def FIELD_CMP_COND_ULE : FIELD_CMP_COND<0b00111>; -def FIELD_CMP_COND_SF : FIELD_CMP_COND<0b01000>; -def FIELD_CMP_COND_NGLE : FIELD_CMP_COND<0b01001>; +def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0b01000>; +def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0b01001>; def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0b01010>; -def FIELD_CMP_COND_NGL : FIELD_CMP_COND<0b01011>; -def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b01100>; -def FIELD_CMP_COND_NGE : FIELD_CMP_COND<0b01101>; -def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b01110>; -def FIELD_CMP_COND_NGT : FIELD_CMP_COND<0b01111>; +def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0b01011>; +def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0b01100>; +def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0b01101>; +def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0b01110>; +def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0b01111>; class FIELD_CMP_FORMAT Val> { bits<5> Value = Val; diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index ebe014a53566..bf60685e6377 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -186,8 +186,8 @@ class CMP_CONDN_DESC_BASE{ - def CMP_F_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"f", Typestr, FGROpnd>, + def CMP_F_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, ISA_MIPS32R6; def CMP_UN_#NAME : COP1_CMP_CONDN_FM, CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>, @@ -198,42 +198,42 @@ multiclass CMP_CC_M , CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>, ISA_MIPS32R6; - def CMP_OLT_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"olt", Typestr, FGROpnd, setolt>, - ISA_MIPS32R6; + def CMP_LT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>, + ISA_MIPS32R6; def CMP_ULT_#NAME : COP1_CMP_CONDN_FM, CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>, ISA_MIPS32R6; - def CMP_OLE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"ole", Typestr, FGROpnd, setole>, - ISA_MIPS32R6; + def CMP_LE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>, + ISA_MIPS32R6; def CMP_ULE_#NAME : COP1_CMP_CONDN_FM, CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>, ISA_MIPS32R6; - def CMP_SF_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"sf", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_NGLE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"ngle", Typestr, FGROpnd>, - ISA_MIPS32R6; + def CMP_SAF_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SUN_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, + ISA_MIPS32R6; def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM, CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, ISA_MIPS32R6; - def CMP_NGL_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"ngl", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_LT_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_NGE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"nge", Typestr, FGROpnd>, + def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SLT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, ISA_MIPS32R6; - def CMP_LE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_NGT_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"ngt", Typestr, FGROpnd>, + def CMP_SULT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SLE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, ISA_MIPS32R6; + def CMP_SULE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, + ISA_MIPS32R6; } //===----------------------------------------------------------------------===// @@ -754,9 +754,9 @@ def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>, ISA_MIPS32R6; def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>, ISA_MIPS32R6; -def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_OLT_S f32:$lhs, f32:$rhs)>, +def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>, ISA_MIPS32R6; -def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_OLE_S f32:$lhs, f32:$rhs)>, +def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>, ISA_MIPS32R6; def : MipsPat<(setne f32:$lhs, f32:$rhs), (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; @@ -774,9 +774,9 @@ def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>, ISA_MIPS32R6; def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>, ISA_MIPS32R6; -def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_OLT_D f64:$lhs, f64:$rhs)>, +def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>, ISA_MIPS32R6; -def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_OLE_D f64:$lhs, f64:$rhs)>, +def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>, ISA_MIPS32R6; def : MipsPat<(setne f64:$lhs, f64:$rhs), (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll index d9ad0f8ad86a..4b5d09778d79 100644 --- a/test/CodeGen/Mips/analyzebranch.ll +++ b/test/CodeGen/Mips/analyzebranch.ll @@ -16,7 +16,7 @@ entry: ; 32-GPR: mtc1 $zero, $[[Z:f[0-9]]] ; 32-GPR: mthc1 $zero, $[[Z:f[0-9]]] ; 64-GPR: dmtc1 $zero, $[[Z:f[0-9]]] -; GPR: cmp.olt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f12 +; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] ; GPR: bnez $[[GPRCC]], $BB diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll index 27246fe20bb2..dce8a7d6da58 100644 --- a/test/CodeGen/Mips/fcmp.ll +++ b/test/CodeGen/Mips/fcmp.ll @@ -52,10 +52,10 @@ define i32 @ogt_f32(float %a, float %b) nounwind { ; 64-C-DAG: c.ule.s $f12, $f13 ; 64-C-DAG: movf $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.olt.s $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.olt.s $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f13, $f12 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp ogt float %a, %b @@ -76,10 +76,10 @@ define i32 @oge_f32(float %a, float %b) nounwind { ; 64-C-DAG: c.ult.s $f12, $f13 ; 64-C-DAG: movf $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.ole.s $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.ole.s $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f13, $f12 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp oge float %a, %b @@ -100,10 +100,10 @@ define i32 @olt_f32(float %a, float %b) nounwind { ; 64-C-DAG: c.olt.s $f12, $f13 ; 64-C-DAG: movt $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.olt.s $[[T0:f[0-9]+]], $f12, $f14 +; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.olt.s $[[T0:f[0-9]+]], $f12, $f13 +; 64-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp olt float %a, %b @@ -124,10 +124,10 @@ define i32 @ole_f32(float %a, float %b) nounwind { ; 64-C-DAG: c.ole.s $f12, $f13 ; 64-C-DAG: movt $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.ole.s $[[T0:f[0-9]+]], $f12, $f14 +; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.ole.s $[[T0:f[0-9]+]], $f12, $f13 +; 64-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp ole float %a, %b @@ -412,10 +412,10 @@ define i32 @ogt_f64(double %a, double %b) nounwind { ; 64-C-DAG: c.ule.d $f12, $f13 ; 64-C-DAG: movf $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.olt.d $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.olt.d $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f13, $f12 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp ogt double %a, %b @@ -436,10 +436,10 @@ define i32 @oge_f64(double %a, double %b) nounwind { ; 64-C-DAG: c.ult.d $f12, $f13 ; 64-C-DAG: movf $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.ole.d $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.ole.d $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f13, $f12 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp oge double %a, %b @@ -460,10 +460,10 @@ define i32 @olt_f64(double %a, double %b) nounwind { ; 64-C-DAG: c.olt.d $f12, $f13 ; 64-C-DAG: movt $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.olt.d $[[T0:f[0-9]+]], $f12, $f14 +; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.olt.d $[[T0:f[0-9]+]], $f12, $f13 +; 64-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp olt double %a, %b @@ -484,10 +484,10 @@ define i32 @ole_f64(double %a, double %b) nounwind { ; 64-C-DAG: c.ole.d $f12, $f13 ; 64-C-DAG: movt $[[T0]], $1, $fcc0 -; 32-CMP-DAG: cmp.ole.d $[[T0:f[0-9]+]], $f12, $f14 +; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $2, $[[T0]] -; 64-CMP-DAG: cmp.ole.d $[[T0:f[0-9]+]], $f12, $f13 +; 64-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $2, $[[T0]] %1 = fcmp ole double %a, %b diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll index 3319fd8b22aa..d4a75bc42ffb 100644 --- a/test/CodeGen/Mips/select.ll +++ b/test/CodeGen/Mips/select.ll @@ -252,7 +252,7 @@ entry: ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] -; 32R6: cmp.olt.s $[[CC:f0]], $[[F2]], $[[F3]] +; 32R6: cmp.lt.s $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.s $[[CC]], $f14, $f12 ; 64: c.olt.s $f14, $f15 @@ -263,7 +263,7 @@ entry: ; 64R2: movt.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 -; 64R6: cmp.olt.s $[[CC:f0]], $f14, $f15 +; 64R6: cmp.lt.s $[[CC:f0]], $f14, $f15 ; 64R6: sel.s $[[CC]], $f13, $f12 %cmp = fcmp olt float %f2, %f3 @@ -289,7 +289,7 @@ entry: ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] -; 32R6: cmp.olt.s $[[CC:f0]], $[[F3]], $[[F2]] +; 32R6: cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.s $[[CC]], $f14, $f12 ; 64: c.ule.s $f14, $f15 @@ -300,7 +300,7 @@ entry: ; 64R2: movf.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 -; 64R6: cmp.olt.s $[[CC:f0]], $f15, $f14 +; 64R6: cmp.lt.s $[[CC:f0]], $f15, $f14 ; 64R6: sel.s $[[CC]], $f13, $f12 %cmp = fcmp ogt float %f2, %f3 @@ -326,7 +326,7 @@ entry: ; 32R6-DAG: lwc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: lwc1 $[[F3:f[0-9]+]], 20($sp) -; 32R6: cmp.olt.s $[[CC:f0]], $[[F3]], $[[F2]] +; 32R6: cmp.lt.s $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.d $[[CC]], $f14, $f12 ; 64: c.ule.s $f14, $f15 @@ -337,7 +337,7 @@ entry: ; 64R2: movf.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 -; 64R6: cmp.olt.s $[[CC:f0]], $f15, $f14 +; 64R6: cmp.lt.s $[[CC:f0]], $f15, $f14 ; 64R6: sel.d $[[CC]], $f13, $f12 %cmp = fcmp ogt float %f2, %f3 @@ -400,7 +400,7 @@ entry: ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) -; 32R6: cmp.olt.d $[[CC:f0]], $[[F2]], $[[F3]] +; 32R6: cmp.lt.d $[[CC:f0]], $[[F2]], $[[F3]] ; 32R6: sel.d $[[CC]], $f14, $f12 ; 64: c.olt.d $f14, $f15 @@ -411,7 +411,7 @@ entry: ; 64R2: movt.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 -; 64R6: cmp.olt.d $[[CC:f0]], $f14, $f15 +; 64R6: cmp.lt.d $[[CC:f0]], $f14, $f15 ; 64R6: sel.d $[[CC]], $f13, $f12 %cmp = fcmp olt double %f2, %f3 @@ -437,7 +437,7 @@ entry: ; 32R6-DAG: ldc1 $[[F2:f[0-9]+]], 16($sp) ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 24($sp) -; 32R6: cmp.olt.d $[[CC:f0]], $[[F3]], $[[F2]] +; 32R6: cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.d $[[CC]], $f14, $f12 ; 64: c.ule.d $f14, $f15 @@ -448,7 +448,7 @@ entry: ; 64R2: movf.d $f13, $f12, $fcc0 ; 64R2: mov.d $f0, $f13 -; 64R6: cmp.olt.d $[[CC:f0]], $f15, $f14 +; 64R6: cmp.lt.d $[[CC:f0]], $f15, $f14 ; 64R6: sel.d $[[CC]], $f13, $f12 %cmp = fcmp ogt double %f2, %f3 @@ -477,7 +477,7 @@ entry: ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mthc1 $7, $[[F2]] ; 32R6-DAG: ldc1 $[[F3:f[0-9]+]], 16($sp) -; 32R6: cmp.olt.d $[[CC:f0]], $[[F3]], $[[F2]] +; 32R6: cmp.lt.d $[[CC:f0]], $[[F3]], $[[F2]] ; 32R6: sel.s $[[CC]], $f14, $f12 ; 64: c.ule.d $f14, $f15 @@ -488,7 +488,7 @@ entry: ; 64R2: movf.s $f13, $f12, $fcc0 ; 64R2: mov.s $f0, $f13 -; 64R6: cmp.olt.d $[[CC:f0]], $f15, $f14 +; 64R6: cmp.lt.d $[[CC:f0]], $f15, $f14 ; 64R6: sel.s $[[CC]], $f13, $f12 %cmp = fcmp ogt double %f2, %f3 @@ -561,7 +561,7 @@ entry: ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] -; 32R6: cmp.olt.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]] +; 32R6: cmp.lt.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant @@ -577,7 +577,7 @@ entry: ; 64R2: movt $5, $4, $fcc0 ; 64R2: move $2, $5 -; 64R6: cmp.olt.s $[[CC:f[0-9]+]], $f14, $f15 +; 64R6: cmp.lt.s $[[CC:f[0-9]+]], $f14, $f15 ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant @@ -607,7 +607,7 @@ entry: ; 32R6-DAG: mtc1 $6, $[[F2:f[0-9]+]] ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] -; 32R6: cmp.olt.s $[[CC:f[0-9]+]], $[[F3]], $[[F2]] +; 32R6: cmp.lt.s $[[CC:f[0-9]+]], $[[F3]], $[[F2]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant @@ -623,7 +623,7 @@ entry: ; 64R2: movf $5, $4, $fcc0 ; 64R2: move $2, $5 -; 64R6: cmp.olt.s $[[CC:f[0-9]+]], $f15, $f14 +; 64R6: cmp.lt.s $[[CC:f[0-9]+]], $f15, $f14 ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant @@ -745,7 +745,7 @@ entry: ; 32R6-DAG: ldc1 $[[TMP:f[0-9]+]], 0($[[D2]]) ; 32R6-DAG: lw $[[D3:[0-9]+]], %got(d3)($1) ; 32R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) -; 32R6: cmp.olt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] +; 32R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant @@ -779,7 +779,7 @@ entry: ; 64R6-DAG: ldc1 $[[TMP:f[0-9]+]], 0($[[D2]]) ; 64R6-DAG: ld $[[D3:[0-9]+]], %got_disp(d3)($1) ; 64R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) -; 64R6: cmp.olt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] +; 64R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant @@ -824,7 +824,7 @@ entry: ; 32R6-DAG: ldc1 $[[TMP:f[0-9]+]], 0($[[D2]]) ; 32R6-DAG: lw $[[D3:[0-9]+]], %got(d3)($1) ; 32R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) -; 32R6: cmp.olt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]] +; 32R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant @@ -858,7 +858,7 @@ entry: ; 64R6-DAG: ldc1 $[[TMP:f[0-9]+]], 0($[[D2]]) ; 64R6-DAG: ld $[[D3:[0-9]+]], %got_disp(d3)($1) ; 64R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) -; 64R6: cmp.olt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]] +; 64R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]] ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] ; FIXME: This move is redundant diff --git a/test/MC/Disassembler/Mips/mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6.txt index 35ddd56b0afa..afef8ada152b 100644 --- a/test/MC/Disassembler/Mips/mips32r6.txt +++ b/test/MC/Disassembler/Mips/mips32r6.txt @@ -50,38 +50,38 @@ 0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4 0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4 0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4 -0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4 +0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4 0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4 0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4 0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4 0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4 0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4 0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4 -0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4 0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4 0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4 -0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4 +0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4 0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4 0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4 -0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4 -0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4 +0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4 +0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4 0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4 0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4 0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4 0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4 # 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256 diff --git a/test/MC/Disassembler/Mips/mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6.txt index 3815dc5bc75e..3ddef9ab42ab 100644 --- a/test/MC/Disassembler/Mips/mips64r6.txt +++ b/test/MC/Disassembler/Mips/mips64r6.txt @@ -50,38 +50,38 @@ 0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4 0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4 0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4 -0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4 +0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4 0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4 0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4 0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4 0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4 0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4 0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4 -0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4 0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4 0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4 -0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4 +0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4 0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4 0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4 -0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4 -0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4 +0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4 +0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4 0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4 0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4 -0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4 -0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4 +0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4 +0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4 0x7c 0x43 0x23 0x64 # CHECK: dalign $4, $2, $3, 5 0x74 0x62 0x12 0x34 # CHECK: daui $3, $2, 4660 0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136 diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s index fac3d62efe2f..f23dbd7302f9 100644 --- a/test/MC/Mips/mips32r6/valid.s +++ b/test/MC/Mips/mips32r6/valid.s @@ -64,38 +64,38 @@ bovc $2, $0, 4 # CHECK: bovc $2, $zero, 4 # encoding: [0x20,0x40,0x00,0x01] bovc $4, $2, 4 # CHECK: bovc $4, $2, 4 # encoding: [0x20,0x82,0x00,0x01] cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0x7c,0xa1,0x04,0x25] - cmp.f.s $f2,$f3,$f4 # CHECK: cmp.f.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x80] - cmp.f.d $f2,$f3,$f4 # CHECK: cmp.f.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x80] + cmp.af.s $f2,$f3,$f4 # CHECK: cmp.af.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x80] + cmp.af.d $f2,$f3,$f4 # CHECK: cmp.af.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x80] cmp.un.s $f2,$f3,$f4 # CHECK: cmp.un.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x81] cmp.un.d $f2,$f3,$f4 # CHECK: cmp.un.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x81] cmp.eq.s $f2,$f3,$f4 # CHECK: cmp.eq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x82] cmp.eq.d $f2,$f3,$f4 # CHECK: cmp.eq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x82] cmp.ueq.s $f2,$f3,$f4 # CHECK: cmp.ueq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x83] cmp.ueq.d $f2,$f3,$f4 # CHECK: cmp.ueq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x83] - cmp.olt.s $f2,$f3,$f4 # CHECK: cmp.olt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x84] - cmp.olt.d $f2,$f3,$f4 # CHECK: cmp.olt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x84] + cmp.lt.s $f2,$f3,$f4 # CHECK: cmp.lt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x84] + cmp.lt.d $f2,$f3,$f4 # CHECK: cmp.lt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x84] cmp.ult.s $f2,$f3,$f4 # CHECK: cmp.ult.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x85] cmp.ult.d $f2,$f3,$f4 # CHECK: cmp.ult.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x85] - cmp.ole.s $f2,$f3,$f4 # CHECK: cmp.ole.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x86] - cmp.ole.d $f2,$f3,$f4 # CHECK: cmp.ole.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x86] + cmp.le.s $f2,$f3,$f4 # CHECK: cmp.le.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x86] + cmp.le.d $f2,$f3,$f4 # CHECK: cmp.le.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x86] cmp.ule.s $f2,$f3,$f4 # CHECK: cmp.ule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x87] cmp.ule.d $f2,$f3,$f4 # CHECK: cmp.ule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x87] - cmp.sf.s $f2,$f3,$f4 # CHECK: cmp.sf.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x88] - cmp.sf.d $f2,$f3,$f4 # CHECK: cmp.sf.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x88] - cmp.ngle.s $f2,$f3,$f4 # CHECK: cmp.ngle.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x89] - cmp.ngle.d $f2,$f3,$f4 # CHECK: cmp.ngle.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x89] + cmp.saf.s $f2,$f3,$f4 # CHECK: cmp.saf.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x88] + cmp.saf.d $f2,$f3,$f4 # CHECK: cmp.saf.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x88] + cmp.sun.s $f2,$f3,$f4 # CHECK: cmp.sun.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x89] + cmp.sun.d $f2,$f3,$f4 # CHECK: cmp.sun.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x89] cmp.seq.s $f2,$f3,$f4 # CHECK: cmp.seq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8a] cmp.seq.d $f2,$f3,$f4 # CHECK: cmp.seq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8a] - cmp.ngl.s $f2,$f3,$f4 # CHECK: cmp.ngl.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8b] - cmp.ngl.d $f2,$f3,$f4 # CHECK: cmp.ngl.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8b] - cmp.lt.s $f2,$f3,$f4 # CHECK: cmp.lt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8c] - cmp.lt.d $f2,$f3,$f4 # CHECK: cmp.lt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8c] - cmp.nge.s $f2,$f3,$f4 # CHECK: cmp.nge.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8d] - cmp.nge.d $f2,$f3,$f4 # CHECK: cmp.nge.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8d] - cmp.le.s $f2,$f3,$f4 # CHECK: cmp.le.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8e] - cmp.le.d $f2,$f3,$f4 # CHECK: cmp.le.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8e] - cmp.ngt.s $f2,$f3,$f4 # CHECK: cmp.ngt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f] - cmp.ngt.d $f2,$f3,$f4 # CHECK: cmp.ngt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f] + cmp.sueq.s $f2,$f3,$f4 # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8b] + cmp.sueq.d $f2,$f3,$f4 # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8b] + cmp.slt.s $f2,$f3,$f4 # CHECK: cmp.slt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8c] + cmp.slt.d $f2,$f3,$f4 # CHECK: cmp.slt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8c] + cmp.sult.s $f2,$f3,$f4 # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8d] + cmp.sult.d $f2,$f3,$f4 # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8d] + cmp.sle.s $f2,$f3,$f4 # CHECK: cmp.sle.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8e] + cmp.sle.d $f2,$f3,$f4 # CHECK: cmp.sle.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8e] + cmp.sule.s $f2,$f3,$f4 # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f] + cmp.sule.d $f2,$f3,$f4 # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f] div $2,$3,$4 # CHECK: div $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9a] divu $2,$3,$4 # CHECK: divu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9b] jialc $5, 256 # CHECK: jialc $5, 256 # encoding: [0xf8,0x05,0x01,0x00] diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s index 40fc23f8ceed..34c1dac5be55 100644 --- a/test/MC/Mips/mips64r6/valid.s +++ b/test/MC/Mips/mips64r6/valid.s @@ -64,38 +64,38 @@ bovc $2, $0, 4 # CHECK: bovc $2, $zero, 4 # encoding: [0x20,0x40,0x00,0x01] bovc $4, $2, 4 # CHECK: bovc $4, $2, 4 # encoding: [0x20,0x82,0x00,0x01] cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0x7c,0xa1,0x04,0x25] - cmp.f.s $f2,$f3,$f4 # CHECK: cmp.f.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x80] - cmp.f.d $f2,$f3,$f4 # CHECK: cmp.f.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x80] + cmp.af.s $f2,$f3,$f4 # CHECK: cmp.af.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x80] + cmp.af.d $f2,$f3,$f4 # CHECK: cmp.af.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x80] cmp.un.s $f2,$f3,$f4 # CHECK: cmp.un.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x81] cmp.un.d $f2,$f3,$f4 # CHECK: cmp.un.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x81] cmp.eq.s $f2,$f3,$f4 # CHECK: cmp.eq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x82] cmp.eq.d $f2,$f3,$f4 # CHECK: cmp.eq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x82] cmp.ueq.s $f2,$f3,$f4 # CHECK: cmp.ueq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x83] cmp.ueq.d $f2,$f3,$f4 # CHECK: cmp.ueq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x83] - cmp.olt.s $f2,$f3,$f4 # CHECK: cmp.olt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x84] - cmp.olt.d $f2,$f3,$f4 # CHECK: cmp.olt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x84] + cmp.lt.s $f2,$f3,$f4 # CHECK: cmp.lt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x84] + cmp.lt.d $f2,$f3,$f4 # CHECK: cmp.lt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x84] cmp.ult.s $f2,$f3,$f4 # CHECK: cmp.ult.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x85] cmp.ult.d $f2,$f3,$f4 # CHECK: cmp.ult.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x85] - cmp.ole.s $f2,$f3,$f4 # CHECK: cmp.ole.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x86] - cmp.ole.d $f2,$f3,$f4 # CHECK: cmp.ole.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x86] + cmp.le.s $f2,$f3,$f4 # CHECK: cmp.le.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x86] + cmp.le.d $f2,$f3,$f4 # CHECK: cmp.le.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x86] cmp.ule.s $f2,$f3,$f4 # CHECK: cmp.ule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x87] cmp.ule.d $f2,$f3,$f4 # CHECK: cmp.ule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x87] - cmp.sf.s $f2,$f3,$f4 # CHECK: cmp.sf.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x88] - cmp.sf.d $f2,$f3,$f4 # CHECK: cmp.sf.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x88] - cmp.ngle.s $f2,$f3,$f4 # CHECK: cmp.ngle.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x89] - cmp.ngle.d $f2,$f3,$f4 # CHECK: cmp.ngle.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x89] + cmp.saf.s $f2,$f3,$f4 # CHECK: cmp.saf.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x88] + cmp.saf.d $f2,$f3,$f4 # CHECK: cmp.saf.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x88] + cmp.sun.s $f2,$f3,$f4 # CHECK: cmp.sun.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x89] + cmp.sun.d $f2,$f3,$f4 # CHECK: cmp.sun.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x89] cmp.seq.s $f2,$f3,$f4 # CHECK: cmp.seq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8a] cmp.seq.d $f2,$f3,$f4 # CHECK: cmp.seq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8a] - cmp.ngl.s $f2,$f3,$f4 # CHECK: cmp.ngl.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8b] - cmp.ngl.d $f2,$f3,$f4 # CHECK: cmp.ngl.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8b] - cmp.lt.s $f2,$f3,$f4 # CHECK: cmp.lt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8c] - cmp.lt.d $f2,$f3,$f4 # CHECK: cmp.lt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8c] - cmp.nge.s $f2,$f3,$f4 # CHECK: cmp.nge.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8d] - cmp.nge.d $f2,$f3,$f4 # CHECK: cmp.nge.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8d] - cmp.le.s $f2,$f3,$f4 # CHECK: cmp.le.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8e] - cmp.le.d $f2,$f3,$f4 # CHECK: cmp.le.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8e] - cmp.ngt.s $f2,$f3,$f4 # CHECK: cmp.ngt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f] - cmp.ngt.d $f2,$f3,$f4 # CHECK: cmp.ngt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f] + cmp.sueq.s $f2,$f3,$f4 # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8b] + cmp.sueq.d $f2,$f3,$f4 # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8b] + cmp.slt.s $f2,$f3,$f4 # CHECK: cmp.slt.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8c] + cmp.slt.d $f2,$f3,$f4 # CHECK: cmp.slt.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8c] + cmp.sult.s $f2,$f3,$f4 # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8d] + cmp.sult.d $f2,$f3,$f4 # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8d] + cmp.sle.s $f2,$f3,$f4 # CHECK: cmp.sle.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8e] + cmp.sle.d $f2,$f3,$f4 # CHECK: cmp.sle.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8e] + cmp.sule.s $f2,$f3,$f4 # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f] + cmp.sule.d $f2,$f3,$f4 # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f] dalign $4,$2,$3,5 # CHECK: dalign $4, $2, $3, 5 # encoding: [0x7c,0x43,0x23,0x64] daui $3,$2,0x1234 # CHECK: daui $3, $2, 4660 # encoding: [0x74,0x62,0x12,0x34] dahi $3,0x5678 # CHECK: dahi $3, 22136 # encoding: [0x04,0x66,0x56,0x78] From 1285b3130b103711fb6e65beeb5d4bcd1a26b433 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 9 Jul 2014 10:47:26 +0000 Subject: [PATCH 0697/1155] [mips][mips64r6] Correct select patterns that have the condition or true/false values backwards Summary: This bug caused SingleSource/Regression/C/uint64_to_float and SingleSource/UnitTests/2002-05-02-CastTest3 to fail (among others). Differential Revision: http://reviews.llvm.org/D4388 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212608 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips32r6InstrInfo.td | 16 ++--- lib/Target/Mips/Mips64r6InstrInfo.td | 36 +++++------ test/CodeGen/Mips/cmov.ll | 92 ++++++++++++++-------------- test/CodeGen/Mips/countleading.ll | 6 +- test/CodeGen/Mips/mips64-f128.ll | 12 ++-- test/CodeGen/Mips/select.ll | 48 +++++++-------- 6 files changed, 105 insertions(+), 105 deletions(-) diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index bf60685e6377..d06e5ca22584 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -786,14 +786,14 @@ def : MipsPat<(select i32:$cond, i32:$t, i32:$f), (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, ISA_MIPS32R6; def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, + (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>, ISA_MIPS32R6; def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f), - (OR (SELNEZ i32:$f, i32:$cond), (SELEQZ i32:$t, i32:$cond))>, + (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, ISA_MIPS32R6; def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, (XORi i32:$cond, immZExt16:$imm)), - (SELEQZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>, + (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)), + (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>, ISA_MIPS32R6; def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), (OR (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)), @@ -801,13 +801,13 @@ def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), ISA_MIPS32R6; def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))), - (SELEQZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>, + (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))), + (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>, ISA_MIPS32R6; def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))), - (SELEQZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>, + (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))), + (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>, ISA_MIPS32R6; def : MipsPat<(select i32:$cond, i32:$t, immz), diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index 5452175f2586..63cf60b44cdc 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -125,36 +125,36 @@ def : MipsPat<(select i64:$cond, i64:$t, i64:$f), (SELEQZ64 i64:$f, i64:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$t, i64:$cond), - (SELEQZ64 i64:$f, i64:$cond))>, + (OR64 (SELEQZ64 i64:$t, i64:$cond), + (SELNEZ64 i64:$f, i64:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$f, i64:$cond), - (SELEQZ64 i64:$t, i64:$cond))>, + (OR64 (SELNEZ64 i64:$t, i64:$cond), + (SELEQZ64 i64:$f, i64:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)), - (SELEQZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>, + (OR64 (SELEQZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)), + (SELNEZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)), - (SELEQZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)))>, + (OR64 (SELNEZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)), + (SELEQZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>, ISA_MIPS64R6; def : MipsPat< (select (i32 (setgt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$t, + (OR64 (SELEQZ64 i64:$t, (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)), sub_32)), - (SELEQZ64 i64:$f, + (SELNEZ64 i64:$f, (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)), sub_32)))>, ISA_MIPS64R6; def : MipsPat< (select (i32 (setugt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$t, + (OR64 (SELEQZ64 i64:$t, (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)), sub_32)), - (SELEQZ64 i64:$f, + (SELNEZ64 i64:$f, (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)), sub_32)))>, ISA_MIPS64R6; @@ -177,17 +177,17 @@ def : MipsPat<(select i32:$cond, i64:$t, i64:$f), (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)), - (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>, + (OR64 (SELEQZ64 i64:$t, (SLL64_32 i32:$cond)), + (SELNEZ64 i64:$f, (SLL64_32 i32:$cond)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$f, (SLL64_32 i32:$cond)), - (SELEQZ64 i64:$t, (SLL64_32 i32:$cond)))>, + (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)), + (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i64:$t, i64:$f), - (OR64 (SELNEZ64 i64:$t, (SLL64_32 (XORi i32:$cond, + (OR64 (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond, immZExt16:$imm))), - (SELEQZ64 i64:$f, (SLL64_32 (XORi i32:$cond, + (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond, immZExt16:$imm))))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i64:$t, i64:$f), diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll index 61f398314de1..999bdb4d4938 100644 --- a/test/CodeGen/Mips/cmov.ll +++ b/test/CodeGen/Mips/cmov.ll @@ -18,9 +18,9 @@ ; 32-CMP-DAG: lw $[[R0:[0-9]+]], %got(i3) ; 32-CMP-DAG: addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1) -; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[R1]], $4 -; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $[[R0]], $4 -; 32-CMP-DAG: or $[[T2:[0-9]+]], $[[T1]], $[[T0]] +; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $[[R1]], $4 +; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[R0]], $4 +; 32-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]] ; 32-CMP-DAG: lw $2, 0($[[T2]]) ; 64-CMOV-DAG: ldr $[[R0:[0-9]+]] @@ -33,9 +33,9 @@ ; (setcc's result is i32 so bits 32-63 are undefined). It's not really ; needed. ; 64-CMP-DAG: sll $[[CC:[0-9]+]], $4, 0 -; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[R1]], $[[CC]] -; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[R0]], $[[CC]] -; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T1]], $[[T0]] +; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[R1]], $[[CC]] +; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[R0]], $[[CC]] +; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]] ; 64-CMP-DAG: ld $2, 0($[[T2]]) define i32* @cmov1(i32 %s) nounwind readonly { @@ -58,9 +58,9 @@ entry: ; 32-CMP-DAG: addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(d) ; 32-CMP-DAG: addiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got(c) -; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[R0]], $4 -; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $[[R1]], $4 -; 32-CMP-DAG: or $[[T2:[0-9]+]], $[[T1]], $[[T0]] +; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $[[R0]], $4 +; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[R1]], $4 +; 32-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]] ; 32-CMP-DAG: lw $2, 0($[[T2]]) ; 64-CMOV: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d) @@ -73,9 +73,9 @@ entry: ; (setcc's result is i32 so bits 32-63 are undefined). It's not really ; needed. ; 64-CMP-DAG: sll $[[CC:[0-9]+]], $4, 0 -; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[R0]], $[[CC]] -; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[R1]], $[[CC]] -; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T1]], $[[T0]] +; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[R0]], $[[CC]] +; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[R1]], $[[CC]] +; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]] ; 64-CMP-DAG: lw $2, 0($[[T2]]) define i32 @cmov2(i32 %s) nounwind readonly { @@ -97,16 +97,16 @@ entry: ; 32-CMOV: movz ${{[26]}}, $5, $[[R0]] ; 32-CMP-DAG: xori $[[CC:[0-9]+]], $4, 234 -; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $5, $[[CC]] -; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[CC]] +; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $5, $[[CC]] +; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[CC]] ; 32-CMP-DAG: or $2, $[[T0]], $[[T1]] ; 64-CMOV: xori $[[R0:[0-9]+]], $4, 234 ; 64-CMOV: movz ${{[26]}}, $5, $[[R0]] ; 64-CMP-DAG: xori $[[CC:[0-9]+]], $4, 234 -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $5, $[[CC]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[CC]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $5, $[[CC]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[CC]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone { @@ -131,10 +131,10 @@ entry: ; 32-CMP-DAG: xori $[[R0:[0-9]+]], $4, 234 ; 32-CMP-DAG: lw $[[R1:[0-9]+]], 16($sp) ; 32-CMP-DAG: lw $[[R2:[0-9]+]], 20($sp) -; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $6, $[[R0]] -; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $7, $[[R0]] -; 32-CMP-DAG: seleqz $[[T2:[0-9]+]], $[[R1]], $[[R0]] -; 32-CMP-DAG: seleqz $[[T3:[0-9]+]], $[[R2]], $[[R0]] +; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $6, $[[R0]] +; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $7, $[[R0]] +; 32-CMP-DAG: selnez $[[T2:[0-9]+]], $[[R1]], $[[R0]] +; 32-CMP-DAG: selnez $[[T3:[0-9]+]], $[[R2]], $[[R0]] ; 32-CMP-DAG: or $2, $[[T0]], $[[T2]] ; 32-CMP-DAG: or $3, $[[T1]], $[[T3]] @@ -142,8 +142,8 @@ entry: ; 64-CMOV: movz ${{[26]}}, $5, $[[R0]] ; 64-CMP-DAG: xori $[[R0:[0-9]+]], $4, 234 -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $5, $[[R0]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[R0]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $5, $[[R0]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone { @@ -172,8 +172,8 @@ entry: ; 32-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 32-CMP-DAG: slti $[[R0:[0-9]+]], $4, 32767 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 32-CMP-DAG: or $2, $[[T0]], $[[T1]] ; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3 @@ -185,8 +185,8 @@ entry: ; 64-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 64-CMP-DAG: slti $[[R0:[0-9]+]], $4, 32767 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i32 @slti0(i32 %a) { @@ -246,8 +246,8 @@ entry: ; 32-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 32-CMP-DAG: slti $[[R0:[0-9]+]], $4, -32768 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 32-CMP-DAG: or $2, $[[T0]], $[[T1]] ; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3 @@ -259,8 +259,8 @@ entry: ; 64-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 64-CMP-DAG: slti $[[R0:[0-9]+]], $4, -32768 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i32 @slti2(i32 %a) { @@ -329,8 +329,8 @@ entry: ; 32-CMP-DAG: slt $[[CC0:[0-9]+]], $zero, $4 ; 32-CMP-DAG: addiu $[[I32766:[0-9]+]], $zero, 32766 ; 32-CMP-DAG: sltu $[[CC1:[0-9]+]], $[[I32766]], $5 -; 32-CMP-DAG: seleqz $[[CC2:[0-9]+]], $[[CC0]], $4 -; 32-CMP-DAG: selnez $[[CC3:[0-9]+]], $[[CC1]], $4 +; 32-CMP-DAG: selnez $[[CC2:[0-9]+]], $[[CC0]], $4 +; 32-CMP-DAG: seleqz $[[CC3:[0-9]+]], $[[CC1]], $4 ; 32-CMP: or $[[CC:[0-9]+]], $[[CC3]], $[[CC2]] ; 32-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 32-CMP-DAG: addiu $[[I4:[0-9]+]], $zero, 4 @@ -349,8 +349,8 @@ entry: ; 64-CMP-DAG: slti $[[R0:[0-9]+]], $4, 32767 ; FIXME: We can do better than this by adding/subtracting the result of slti ; to/from one of the constants. -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I5]], $[[R0]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I4]], $[[R0]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I5]], $[[R0]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I4]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i64 @slti64_0(i64 %a) { @@ -374,8 +374,8 @@ entry: ; 32-CMP-DAG: slt $[[CC0:[0-9]+]], $zero, $4 ; 32-CMP-DAG: addiu $[[I32766:[0-9]+]], $zero, 32767 ; 32-CMP-DAG: sltu $[[CC1:[0-9]+]], $[[I32766]], $5 -; 32-CMP-DAG: seleqz $[[CC2:[0-9]+]], $[[CC0]], $4 -; 32-CMP-DAG: selnez $[[CC3:[0-9]+]], $[[CC1]], $4 +; 32-CMP-DAG: selnez $[[CC2:[0-9]+]], $[[CC0]], $4 +; 32-CMP-DAG: seleqz $[[CC3:[0-9]+]], $[[CC1]], $4 ; 32-CMP: or $[[CC:[0-9]+]], $[[CC3]], $[[CC2]] ; 32-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 32-CMP-DAG: addiu $[[I4:[0-9]+]], $zero, 4 @@ -423,8 +423,8 @@ entry: ; 64-CMP-DAG: slti $[[R0:[0-9]+]], $4, -32768 ; FIXME: We can do better than this by adding/subtracting the result of slti ; to/from one of the constants. -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I4]], $[[R0]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I4]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i64 @slti64_2(i64 %a) { @@ -476,8 +476,8 @@ entry: ; 32-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 32-CMP-DAG: sltiu $[[R0:[0-9]+]], $4, 32767 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 32-CMP-DAG: or $2, $[[T0]], $[[T1]] ; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3 @@ -489,8 +489,8 @@ entry: ; 64-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 64-CMP-DAG: sltiu $[[R0:[0-9]+]], $4, 32767 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i32 @sltiu0(i32 %a) { @@ -550,8 +550,8 @@ entry: ; 32-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 32-CMP-DAG: sltiu $[[R0:[0-9]+]], $4, -32768 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 32-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 32-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 32-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 32-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 32-CMP-DAG: or $2, $[[T0]], $[[T1]] ; 64-CMOV-DAG: addiu $[[I3:[0-9]+]], $zero, 3 @@ -563,8 +563,8 @@ entry: ; 64-CMP-DAG: addiu $[[I5:[0-9]+]], $zero, 5 ; 64-CMP-DAG: sltiu $[[R0:[0-9]+]], $4, -32768 ; FIXME: We can do better than this by using selccz to choose between +0 and +2 -; 64-CMP-DAG: selnez $[[T0:[0-9]+]], $[[I3]], $[[R0]] -; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]] +; 64-CMP-DAG: seleqz $[[T0:[0-9]+]], $[[I3]], $[[R0]] +; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]] ; 64-CMP-DAG: or $2, $[[T0]], $[[T1]] define i32 @sltiu2(i32 %a) { diff --git a/test/CodeGen/Mips/countleading.ll b/test/CodeGen/Mips/countleading.ll index 81fb2b44942b..6e63cff123cf 100644 --- a/test/CodeGen/Mips/countleading.ll +++ b/test/CodeGen/Mips/countleading.ll @@ -52,9 +52,9 @@ entry: ; MIPS32-GT-R1-DAG: clz $[[R1:[0-9]+]], $5 ; MIPS32-GT-R1-DAG: addiu $[[R2:2+]], $[[R0]], 32 ; MIPS32-R1-R2-DAG: movn $[[R2]], $[[R1]], $5 -; MIPS32-R6-DAG: selnez $[[R5:[0-9]+]], $[[R2]], $5 -; MIPS32-R6-DAG: seleqz $[[R6:[0-9]+]], $[[R1]], $5 -; MIPS32-R6-DAG: or $2, $[[R5]], $[[R6]] +; MIPS32-R6-DAG: seleqz $[[R5:[0-9]+]], $[[R2]], $5 +; MIPS32-R6-DAG: selnez $[[R6:[0-9]+]], $[[R1]], $5 +; MIPS32-R6-DAG: or $2, $[[R6]], $[[R5]] ; MIPS32-GT-R1-DAG: addiu $3, $zero, 0 ; MIPS64-GT-R1: dclz $2, $4 diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll index d523f253bf39..7f7d515d690e 100644 --- a/test/CodeGen/Mips/mips64-f128.ll +++ b/test/CodeGen/Mips/mips64-f128.ll @@ -628,12 +628,12 @@ entry: ; (setcc's result is i32 so bits 32-63 are undefined). It's not really ; needed. ; CMP_CC_FMT-DAG: sll $[[CC:[0-9]+]], $4, 0 -; CMP_CC_FMT-DAG: selnez $[[EQ1:[0-9]+]], $8, $[[CC]] -; CMP_CC_FMT-DAG: seleqz $[[NE1:[0-9]+]], $6, $[[CC]] -; CMP_CC_FMT-DAG: or $2, $[[EQ1]], $[[NE1]] -; CMP_CC_FMT-DAG: selnez $[[EQ2:[0-9]+]], $9, $[[CC]] -; CMP_CC_FMT-DAG: seleqz $[[NE2:[0-9]+]], $7, $[[CC]] -; CMP_CC_FMT-DAG: or $4, $[[EQ2]], $[[NE2]] +; CMP_CC_FMT-DAG: seleqz $[[EQ1:[0-9]+]], $8, $[[CC]] +; CMP_CC_FMT-DAG: selnez $[[NE1:[0-9]+]], $6, $[[CC]] +; CMP_CC_FMT-DAG: or $2, $[[NE1]], $[[EQ1]] +; CMP_CC_FMT-DAG: seleqz $[[EQ2:[0-9]+]], $9, $[[CC]] +; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]] +; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]] define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) { entry: diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll index d4a75bc42ffb..91471a2ce6bb 100644 --- a/test/CodeGen/Mips/select.ll +++ b/test/CodeGen/Mips/select.ll @@ -18,9 +18,9 @@ entry: ; 32R2: movn $5, $6, $4 ; 32R2: move $2, $5 -; 32R6-DAG: selnez $[[T0:[0-9]+]], $5, $4 -; 32R6-DAG: seleqz $[[T1:[0-9]+]], $6, $4 -; 32R6: or $2, $[[T0]], $[[T1]] +; 32R6-DAG: seleqz $[[T0:[0-9]+]], $5, $4 +; 32R6-DAG: selnez $[[T1:[0-9]+]], $6, $4 +; 32R6: or $2, $[[T1]], $[[T0]] ; 64: movn $5, $6, $4 ; 64: move $2, $5 @@ -28,9 +28,9 @@ entry: ; 64R2: movn $5, $6, $4 ; 64R2: move $2, $5 -; 64R6-DAG: selnez $[[T0:[0-9]+]], $5, $4 -; 64R6-DAG: seleqz $[[T1:[0-9]+]], $6, $4 -; 64R6: or $2, $[[T0]], $[[T1]] +; 64R6-DAG: seleqz $[[T0:[0-9]+]], $5, $4 +; 64R6-DAG: selnez $[[T1:[0-9]+]], $6, $4 +; 64R6: or $2, $[[T1]], $[[T0]] %tobool = icmp ne i32 %s, 0 %cond = select i1 %tobool, i32 %f1, i32 %f0 @@ -56,13 +56,13 @@ entry: ; 32R2: move $3, $7 ; 32R6-DAG: lw $[[F1:[0-9]+]], 16($sp) -; 32R6-DAG: selnez $[[T0:[0-9]+]], $6, $4 -; 32R6-DAG: seleqz $[[T1:[0-9]+]], $[[F1]], $4 -; 32R6: or $2, $[[T0]], $[[T1]] +; 32R6-DAG: seleqz $[[T0:[0-9]+]], $6, $4 +; 32R6-DAG: selnez $[[T1:[0-9]+]], $[[F1]], $4 +; 32R6: or $2, $[[T1]], $[[T0]] ; 32R6-DAG: lw $[[F1H:[0-9]+]], 20($sp) -; 32R6-DAG: selnez $[[T0:[0-9]+]], $7, $4 -; 32R6-DAG: seleqz $[[T1:[0-9]+]], $[[F1H]], $4 -; 32R6: or $3, $[[T0]], $[[T1]] +; 32R6-DAG: seleqz $[[T0:[0-9]+]], $7, $4 +; 32R6-DAG: selnez $[[T1:[0-9]+]], $[[F1H]], $4 +; 32R6: or $3, $[[T1]], $[[T0]] ; 64: movn $5, $6, $4 ; 64: move $2, $5 @@ -74,9 +74,9 @@ entry: ; (setcc's result is i32 so bits 32-63 are undefined). It's not really ; needed. ; 64R6-DAG: sll $[[CC:[0-9]+]], $4, 0 -; 64R6-DAG: selnez $[[T0:[0-9]+]], $5, $[[CC]] -; 64R6-DAG: seleqz $[[T1:[0-9]+]], $6, $[[CC]] -; 64R6: or $2, $[[T0]], $[[T1]] +; 64R6-DAG: seleqz $[[T0:[0-9]+]], $5, $[[CC]] +; 64R6-DAG: selnez $[[T1:[0-9]+]], $6, $[[CC]] +; 64R6: or $2, $[[T1]], $[[T0]] %tobool = icmp ne i32 %s, 0 %cond = select i1 %tobool, i64 %f1, i64 %f0 @@ -105,13 +105,13 @@ entry: ; 32R6-DAG: lw $[[F1:[0-9]+]], 16($sp) ; 32R6-DAG: or $[[T2:[0-9]+]], $4, $5 -; 32R6-DAG: selnez $[[T0:[0-9]+]], $6, $[[T2]] -; 32R6-DAG: seleqz $[[T1:[0-9]+]], $[[F1]], $[[T2]] -; 32R6: or $2, $[[T0]], $[[T1]] +; 32R6-DAG: seleqz $[[T0:[0-9]+]], $6, $[[T2]] +; 32R6-DAG: selnez $[[T1:[0-9]+]], $[[F1]], $[[T2]] +; 32R6: or $2, $[[T1]], $[[T0]] ; 32R6-DAG: lw $[[F1H:[0-9]+]], 20($sp) -; 32R6-DAG: selnez $[[T0:[0-9]+]], $7, $[[T2]] -; 32R6-DAG: seleqz $[[T1:[0-9]+]], $[[F1H]], $[[T2]] -; 32R6: or $3, $[[T0]], $[[T1]] +; 32R6-DAG: seleqz $[[T0:[0-9]+]], $7, $[[T2]] +; 32R6-DAG: selnez $[[T1:[0-9]+]], $[[F1H]], $[[T2]] +; 32R6: or $3, $[[T1]], $[[T0]] ; 64: movn $5, $6, $4 ; 64: move $2, $5 @@ -119,9 +119,9 @@ entry: ; 64R2: movn $5, $6, $4 ; 64R2: move $2, $5 -; 64R6-DAG: selnez $[[T0:[0-9]+]], $5, $4 -; 64R6-DAG: seleqz $[[T1:[0-9]+]], $6, $4 -; 64R6: or $2, $[[T0]], $[[T1]] +; 64R6-DAG: seleqz $[[T0:[0-9]+]], $5, $4 +; 64R6-DAG: selnez $[[T1:[0-9]+]], $6, $4 +; 64R6: or $2, $[[T1]], $[[T0]] %tobool = icmp ne i64 %s, 0 %cond = select i1 %tobool, i64 %f1, i64 %f0 From ce184e95f9d98e90184c2cd327acb157a231af60 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 9 Jul 2014 10:58:18 +0000 Subject: [PATCH 0698/1155] [x86] Add a ZERO_EXTEND_VECTOR_INREG DAG node and use it when widening vector types to be legal and a ZERO_EXTEND node is encountered. When we use widening to legalize vector types, extend nodes are a real challenge. Either the input or output is likely to be legal, but in many cases not both. As a consequence, we don't really have any way to represent this situation and the prior code in the widening legalization framework would just scalarize the extend operation completely. This patch introduces a new DAG node to represent doing a zero extend of a vector "in register". The core of the idea is to allow legal but different vector types in the input and output. The output vector must have fewer lanes but wider elements. The operation is defined to zero extend the low elements of the input to the size of the output elements, and drop all of the high elements which don't have a corresponding lane in the output vector. It also includes generic expansion of this node in terms of blending a zero vector into the high elements of the vector and bitcasting across. This in turn yields extremely nice code for x86 SSE2 when we use the new widening legalization logic in conjunction with the new shuffle lowering logic. There is still more to do here. We need to support sign extension, any extension, and potentially int-to-float conversions. My current plan is to continue using similar synthetic nodes to model each of these transitions with generic lowering code for each one. However, with this patch LLVM already reaches performance parity with GCC for the core C loops of the x264 code (assuming you disable the hand-written assembly versions) when compiling for SSE2 and SSE3 architectures and enabling the new widening and lowering logic for vectors. Differential Revision: http://reviews.llvm.org/D4405 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212610 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ISDOpcodes.h | 9 ++++ include/llvm/CodeGen/SelectionDAG.h | 6 +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorOps.cpp | 42 +++++++++++++++++++ .../SelectionDAG/LegalizeVectorTypes.cpp | 22 +++++++++- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 ++++ .../SelectionDAG/SelectionDAGDumper.cpp | 1 + lib/Target/X86/X86ISelLowering.cpp | 1 + test/CodeGen/X86/widen_conversions.ll | 18 ++++++++ 9 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/widen_conversions.ll diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 80fb8b2d3a5d..fb138f629aa5 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -379,6 +379,15 @@ namespace ISD { /// operand, a ValueType node. SIGN_EXTEND_INREG, + /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an + /// in-register zero-extension of the low lanes of an integer vector. The + /// result type must have fewer elements than the operand type, and those + /// elements must be larger integer types such that the total size of the + /// operand type and the result type match. Each of the low operand + /// elements is zero-extended into the corresponding, wider result + /// elements. + ZERO_EXTEND_VECTOR_INREG, + /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned /// integer. FP_TO_SINT, diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index db2e841e1720..c2ae553c778d 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -562,6 +562,12 @@ class SelectionDAG { /// value assuming it was the smaller SrcTy value. SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy); + /// getZeroExtendVectorInReg - Return an operation which will zero extend the + /// low lanes of the operand into the specified vector type. For example, + /// this can convert a v16i8 into a v4i32 by zero extending the low four + /// lanes of the operand from i8 to i32. + SDValue getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT); + /// getBoolExtOrTrunc - Convert Op, which must be of integer type, to the /// integer type VT, by using an extension appropriate for the target's /// BooleanContent or truncating it. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 6ef2d7f0bc93..a6bbc218dd22 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -649,6 +649,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_SETCC(SDNode* N); + SDValue WidenVecOp_ZERO_EXTEND(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2c7767469821..3fa64843a93a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -75,6 +75,12 @@ class VectorLegalizer { /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + /// \brief Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); @@ -274,6 +280,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -614,6 +621,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { switch (Op->getOpcode()) { case ISD::SIGN_EXTEND_INREG: return ExpandSEXTINREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); case ISD::BSWAP: return ExpandBSWAP(Op); case ISD::VSELECT: @@ -708,6 +717,39 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build up a zero vector to blend into this one. + EVT SrcScalarVT = SrcVT.getScalarType(); + SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SmallVector BuildVectorOperands(NumSrcElements, ScalarZero); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { EVT VT = Op.getValueType(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 74f8f72c7ab2..dc61577d6a3b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2380,6 +2380,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::ZERO_EXTEND: Res = WidenVecOp_ZERO_EXTEND(N); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2388,7 +2389,6 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = WidenVecOp_Convert(N); break; @@ -2410,6 +2410,26 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { return false; } +SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just zero-extending the low lanes is the + // correct transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + EVT InVT = InOp.getValueType(); + assert(NumElts < InVT.getVectorNumElements() && "Input wasn't widened!"); + + // Use a special DAG node to represent the operation of zero extending the + // low lanes. + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c2cb0d8d7799..9b65d90383b8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1032,6 +1032,13 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, Op.getValueType())); } +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c92fb2453c2a..63746e1cdc08 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -221,6 +221,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 188a0befcbdb..70d26ee1561a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -869,6 +869,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::TRUNCATE, VT, Expand); setOperationAction(ISD::SIGN_EXTEND, VT, Expand); setOperationAction(ISD::ZERO_EXTEND, VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); diff --git a/test/CodeGen/X86/widen_conversions.ll b/test/CodeGen/X86/widen_conversions.ll new file mode 100644 index 000000000000..522ab475c2a0 --- /dev/null +++ b/test/CodeGen/X86/widen_conversions.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mcpu=x86-64 -x86-experimental-vector-widening-legalization -x86-experimental-vector-shuffle-lowering | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +define <4 x i32> @zext_v4i8_to_v4i32(<4 x i8>* %ptr) { +; CHECK-LABEL: zext_v4i8_to_v4i32: +; +; CHECK: movd (%{{.*}}), %[[X:xmm[0-9]+]] +; CHECK-NEXT: pxor %[[Z:xmm[0-9]+]], %[[Z]] +; CHECK-NEXT: punpcklbw %[[Z]], %[[X]] +; CHECK-NEXT: punpcklbw %[[Z]], %[[X]] +; CHECK-NEXT: ret + + %val = load <4 x i8>* %ptr + %ext = zext <4 x i8> %val to <4 x i32> + ret <4 x i32> %ext +} From 4afbd3e941177f37b362dffc1731b6cfa288684a Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 9 Jul 2014 11:12:39 +0000 Subject: [PATCH 0699/1155] X86: When lowering v8i32 himuls use the correct shuffle masks for AVX2. Turns out my trick of using the same masks for SSE4.1 and AVX2 didn't work out as we have to blend two vectors. While there remove unecessary cross-lane moves from the shuffles so the backend can lower it to palignr instead of vperm. Fixes PR20118, a miscompilation of vector sdiv by constant on AVX2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212611 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++---- test/CodeGen/X86/vector-idiv.ll | 42 ++++++++++++++++-------------- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 70d26ee1561a..5f72d5004fd1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15122,7 +15122,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, (VT == MVT::v8i32 && Subtarget->hasInt256())); // Get the high parts. - const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8}; + const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1}; SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); @@ -15138,10 +15138,18 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1)); // Shuffle it back into the right order. - const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15}; - SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); - const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14}; - SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + SDValue Highs, Lows; + if (VT == MVT::v8i32) { + const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15}; + Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14}; + Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + } else { + const int HighMask[] = {1, 5, 3, 7}; + Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 4, 2, 6}; + Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + } // If we have a signed multiply but no PMULDQ fix up the high parts of a // unsigned multiply. diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll index 4c30184a5426..b6d43e985f0b 100644 --- a/test/CodeGen/X86/vector-idiv.ll +++ b/test/CodeGen/X86/vector-idiv.ll @@ -8,7 +8,7 @@ define <4 x i32> @test1(<4 x i32> %a) { ; SSE41-LABEL: test1: ; SSE41: pmuludq -; SSE41: pshufd $57 +; SSE41: pshufd $49 ; SSE41: pmuludq ; SSE41: shufps $-35 ; SSE41: psubd @@ -18,7 +18,7 @@ define <4 x i32> @test1(<4 x i32> %a) { ; AVX-LABEL: test1: ; AVX: vpmuludq -; AVX: vpshufd $57 +; AVX: vpshufd $49 ; AVX: vpmuludq ; AVX: vshufps $-35 ; AVX: vpsubd @@ -32,11 +32,11 @@ define <8 x i32> @test2(<8 x i32> %a) { ret <8 x i32> %div ; AVX-LABEL: test2: -; AVX: vpermd +; AVX: vpbroadcastd +; AVX: vpalignr $4 ; AVX: vpmuludq -; AVX: vshufps $-35 ; AVX: vpmuludq -; AVX: vshufps $-35 +; AVX: vpblendd $170 ; AVX: vpsubd ; AVX: vpsrld $1 ; AVX: vpadd @@ -107,6 +107,12 @@ define <16 x i16> @test6(<16 x i16> %a) { define <16 x i8> @test7(<16 x i8> %a) { %div = sdiv <16 x i8> %a, ret <16 x i8> %div + +; FIXME: scalarized +; SSE41-LABEL: test7: +; SSE41: pext +; AVX-LABEL: test7: +; AVX: pext } define <4 x i32> @test8(<4 x i32> %a) { @@ -115,8 +121,8 @@ define <4 x i32> @test8(<4 x i32> %a) { ; SSE41-LABEL: test8: ; SSE41: pmuldq -; SSE41: pshufd $57 -; SSE41-NOT: pshufd $57 +; SSE41: pshufd $49 +; SSE41-NOT: pshufd $49 ; SSE41: pmuldq ; SSE41: shufps $-35 ; SSE41: pshufd $-40 @@ -130,8 +136,8 @@ define <4 x i32> @test8(<4 x i32> %a) { ; SSE: pand ; SSE: paddd ; SSE: pmuludq -; SSE: pshufd $57 -; SSE-NOT: pshufd $57 +; SSE: pshufd $49 +; SSE-NOT: pshufd $49 ; SSE: pmuludq ; SSE: shufps $-35 ; SSE: pshufd $-40 @@ -143,8 +149,8 @@ define <4 x i32> @test8(<4 x i32> %a) { ; AVX-LABEL: test8: ; AVX: vpmuldq -; AVX: vpshufd $57 -; AVX-NOT: vpshufd $57 +; AVX: vpshufd $49 +; AVX-NOT: vpshufd $49 ; AVX: vpmuldq ; AVX: vshufps $-35 ; AVX: vpshufd $-40 @@ -159,12 +165,11 @@ define <8 x i32> @test9(<8 x i32> %a) { ret <8 x i32> %div ; AVX-LABEL: test9: +; AVX: vpalignr $4 ; AVX: vpbroadcastd ; AVX: vpmuldq -; AVX: vshufps $-35 ; AVX: vpmuldq -; AVX: vshufps $-35 -; AVX: vpshufd $-40 +; AVX: vpblendd $170 ; AVX: vpadd ; AVX: vpsrld $31 ; AVX: vpsrad $2 @@ -177,10 +182,10 @@ define <8 x i32> @test10(<8 x i32> %a) { ; AVX-LABEL: test10: ; AVX: vpbroadcastd +; AVX: vpalignr $4 ; AVX: vpmuludq -; AVX: vshufps $-35 ; AVX: vpmuludq -; AVX: vshufps $-35 +; AVX: vpblendd $170 ; AVX: vpsubd ; AVX: vpsrld $1 ; AVX: vpadd @@ -193,12 +198,11 @@ define <8 x i32> @test11(<8 x i32> %a) { ret <8 x i32> %rem ; AVX-LABEL: test11: +; AVX: vpalignr $4 ; AVX: vpbroadcastd ; AVX: vpmuldq -; AVX: vshufps $-35 ; AVX: vpmuldq -; AVX: vshufps $-35 -; AVX: vpshufd $-40 +; AVX: vpblendd $170 ; AVX: vpadd ; AVX: vpsrld $31 ; AVX: vpsrad $2 From f4fcb0ceab8dfca074b843e00e3528215f6e44f5 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 9 Jul 2014 11:13:16 +0000 Subject: [PATCH 0700/1155] Sink two variables only used in an assert into the assert itself. Should fix the release builds with Werror. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212612 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index dc61577d6a3b..4af854ec0c90 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2413,7 +2413,6 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); // If some legalization strategy other than widening is used on the operand, @@ -2422,8 +2421,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) return WidenVecOp_Convert(N); InOp = GetWidenedVector(InOp); - EVT InVT = InOp.getValueType(); - assert(NumElts < InVT.getVectorNumElements() && "Input wasn't widened!"); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); // Use a special DAG node to represent the operation of zero extending the // low lanes. From 4c27c85cde1657246ad9291af15899f249026bc7 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 9 Jul 2014 12:36:54 +0000 Subject: [PATCH 0701/1155] [x86] Fix a bug in my new zext-vector-inreg DAG trickery where we were not widening the input type to the node sufficiently to let the ext take place in a register. This would in turn result in a mysterious bitcast assertion failure downstream. First change here is to add back the helpful assert I had in an earlier version of the code to catch this immediately. Next change is to add support to the type legalization to detect when we have widened the operand either too little or too much (for whatever reason) and find a size-matched legal vector type to convert it to first. This can also fail so we get a new fallback path, but that seems OK. With this, we no longer crash on vec_cast2.ll when using widening. I've also added the CHECK lines for the zero-extend cases here. We still need to support sign-extend and trunc (or something) to get plausible code for the other two thirds of this test which is one of the regression tests that showed the most scalarization when widening was force-enabled. Slowly closing in on widening being a viable legalization strategy without it resorting to scalarization at every turn. =] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212614 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 33 +++++++++++++++++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 ++ test/CodeGen/X86/vec_cast2.ll | 9 +++++ 3 files changed, 45 insertions(+) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4af854ec0c90..c50625e20805 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2425,6 +2425,39 @@ SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { InOp.getValueType().getVectorNumElements() && "Input wasn't widened!"); + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + // Use a special DAG node to represent the operation of zero extending the // low lanes. return DAG.getZeroExtendVectorInReg(InOp, DL, VT); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9b65d90383b8..119b0255e7c3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1034,6 +1034,9 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && "The destination vector type must have fewer lanes than the input."); return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll index 5f6e7a853a33..c7f1554cdd4e 100644 --- a/test/CodeGen/X86/vec_cast2.ll +++ b/test/CodeGen/X86/vec_cast2.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE ;CHECK-LABEL: foo1_8: ;CHECK: vcvtdq2ps @@ -19,6 +20,10 @@ define <4 x float> @foo1_4(<4 x i8> %src) { ;CHECK-LABEL: foo2_8: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo2_8: +;CHECK-WIDE: vcvtdq2ps %ymm{{.*}}, %ymm{{.*}} +;CHECK-WIDE: ret define <8 x float> @foo2_8(<8 x i8> %src) { %res = uitofp <8 x i8> %src to <8 x float> ret <8 x float> %res @@ -27,6 +32,10 @@ define <8 x float> @foo2_8(<8 x i8> %src) { ;CHECK-LABEL: foo2_4: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo2_4: +;CHECK-WIDE: vcvtdq2ps %xmm{{.*}}, %xmm{{.*}} +;CHECK-WIDE: ret define <4 x float> @foo2_4(<4 x i8> %src) { %res = uitofp <4 x i8> %src to <4 x float> ret <4 x float> %res From 7fba8d794f5778dcbd1b220d53f5f184e77a19cb Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 9 Jul 2014 13:03:37 +0000 Subject: [PATCH 0702/1155] Generic: add range-adapter for option parsing. I want to use it in lld, but while I'm here I'll update LLVM uses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212615 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Option/ArgList.h | 7 +++++++ lib/Option/ArgList.cpp | 30 +++++++++++++----------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h index af8cae15fc2d..a0b00b5d87e6 100644 --- a/include/llvm/Option/ArgList.h +++ b/include/llvm/Option/ArgList.h @@ -150,6 +150,13 @@ class ArgList { return arg_iterator(Args.end(), *this); } + iterator_range filtered(OptSpecifier Id0 = 0U, + OptSpecifier Id1 = 0U, + OptSpecifier Id2 = 0U) const { + return iterator_range(filtered_begin(Id0, Id1, Id2), + filtered_end()); + } + /// @} /// @name Arg Removal /// @{ diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index 28b42a8f8290..5848bb11bfa1 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -234,44 +234,40 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0, void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { - for (arg_iterator it = filtered_begin(Id0, Id1, Id2), - ie = filtered_end(); it != ie; ++it) { - (*it)->claim(); - (*it)->render(*this, Output); + for (auto Arg: filtered(Id0, Id1, Id2)) { + Arg->claim(); + Arg->render(*this, Output); } } void ArgList::AddAllArgValues(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { - for (arg_iterator it = filtered_begin(Id0, Id1, Id2), - ie = filtered_end(); it != ie; ++it) { - (*it)->claim(); - for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i) - Output.push_back((*it)->getValue(i)); + for (auto Arg : filtered(Id0, Id1, Id2)) { + Arg->claim(); + for (unsigned i = 0, e = Arg->getNumValues(); i != e; ++i) + Output.push_back(Arg->getValue(i)); } } void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0, const char *Translation, bool Joined) const { - for (arg_iterator it = filtered_begin(Id0), - ie = filtered_end(); it != ie; ++it) { - (*it)->claim(); + for (auto Arg: filtered(Id0)) { + Arg->claim(); if (Joined) { Output.push_back(MakeArgString(StringRef(Translation) + - (*it)->getValue(0))); + Arg->getValue(0))); } else { Output.push_back(Translation); - Output.push_back((*it)->getValue(0)); + Output.push_back(Arg->getValue(0)); } } } void ArgList::ClaimAllArgs(OptSpecifier Id0) const { - for (arg_iterator it = filtered_begin(Id0), - ie = filtered_end(); it != ie; ++it) - (*it)->claim(); + for (auto Arg : filtered(Id0)) + Arg->claim(); } void ArgList::ClaimAllArgs() const { From 6a6556695e2e3c983a95d6dd78d601f857b861ab Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 9 Jul 2014 16:03:10 +0000 Subject: [PATCH 0703/1155] Add Imagination Technologies to the vendors in llvm::Triple Summary: This is a pre-requisite for supporting the mips-img-linux-gnu triple in clang. Differential Revision: http://reviews.llvm.org/D4435 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212626 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/Triple.h | 1 + lib/Support/Triple.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index a10bc734da2c..a2e841162c6e 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -88,6 +88,7 @@ class Triple { BGQ, Freescale, IBM, + ImaginationTechnologies, NVIDIA }; enum OSType { diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 8f3cf7177337..445502d33204 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -115,6 +115,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case BGQ: return "bgq"; case Freescale: return "fsl"; case IBM: return "ibm"; + case ImaginationTechnologies: return "img"; case NVIDIA: return "nvidia"; } @@ -292,6 +293,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("bgq", Triple::BGQ) .Case("fsl", Triple::Freescale) .Case("ibm", Triple::IBM) + .Case("img", Triple::ImaginationTechnologies) .Case("nvidia", Triple::NVIDIA) .Default(Triple::UnknownVendor); } From 296cb7b128114eda207f96c65513335efe46a1d5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 9 Jul 2014 16:34:54 +0000 Subject: [PATCH 0704/1155] Fix for PR20059 (instcombine reorders shufflevector after instruction that may trap) In PR20059 ( http://llvm.org/pr20059 ), instcombine eliminates shuffles that are necessary before performing an operation that can trap (srem). This patch calls isSafeToSpeculativelyExecute() and bails out of the optimization in SimplifyVectorOp() if needed. Differential Revision: http://reviews.llvm.org/D4424 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212629 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstructionCombining.cpp | 6 ++++ test/Transforms/InstCombine/pr20059.ll | 32 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 test/Transforms/InstCombine/pr20059.ll diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 6cba8e37509b..487699c41e95 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -1195,6 +1196,11 @@ static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { if (!Inst.getType()->isVectorTy()) return nullptr; + // It may not be safe to reorder shuffles and things like div, urem, etc. + // because we may trap when executing those ops on unknown vector elements. + // See PR20059. + if (!isSafeToSpeculativelyExecute(&Inst)) return nullptr; + unsigned VWidth = cast(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); assert(cast(LHS->getType())->getNumElements() == VWidth); diff --git a/test/Transforms/InstCombine/pr20059.ll b/test/Transforms/InstCombine/pr20059.ll new file mode 100644 index 000000000000..8d7d69bf8765 --- /dev/null +++ b/test/Transforms/InstCombine/pr20059.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed +; for an srem operation. This is not a valid optimization because it may cause a trap +; on div-by-zero. + +; CHECK-LABEL: @do_not_reorder +; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2 +define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) { + %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer + %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer + %retval = srem <4 x i32> %splat1, %splat2 + ret <4 x i32> %retval +} +; RUN: opt -S -instcombine < %s | FileCheck %s + +; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed +; for an srem operation. This is not a valid optimization because it may cause a trap +; on div-by-zero. + +; CHECK-LABEL: @do_not_reorder +; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2 +define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) { + %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer + %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer + %retval = srem <4 x i32> %splat1, %splat2 + ret <4 x i32> %retval +} From 90df187c709c4791016003225db270b21eb52605 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 9 Jul 2014 17:49:58 +0000 Subject: [PATCH 0705/1155] removed duplicate testcase git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212632 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/InstCombine/pr20059.ll | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/test/Transforms/InstCombine/pr20059.ll b/test/Transforms/InstCombine/pr20059.ll index 8d7d69bf8765..0ef315936ff2 100644 --- a/test/Transforms/InstCombine/pr20059.ll +++ b/test/Transforms/InstCombine/pr20059.ll @@ -14,19 +14,3 @@ define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) { %retval = srem <4 x i32> %splat1, %splat2 ret <4 x i32> %retval } -; RUN: opt -S -instcombine < %s | FileCheck %s - -; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed -; for an srem operation. This is not a valid optimization because it may cause a trap -; on div-by-zero. - -; CHECK-LABEL: @do_not_reorder -; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2 -define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) { - %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer - %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer - %retval = srem <4 x i32> %splat1, %splat2 - ret <4 x i32> %retval -} From 479af7ce0cecd498b71f2810abbab32248a0f83c Mon Sep 17 00:00:00 2001 From: Louis Gerbarg Date: Wed, 9 Jul 2014 17:54:32 +0000 Subject: [PATCH 0706/1155] Make AArch64FastISel::EmitIntExt explicitly check its source and destination types This is a follow up to r212492. There should be no functional difference, but this patch makes it clear that SrcVT must be an i1/i8/16/i32 and DestVT must be an i8/i16/i32/i64. rdar://17516686 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212633 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index d822c1bf327b..c4e0ff0c38c9 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1751,9 +1751,14 @@ unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); - // FastISel does not have plumbing to deal with an MVT::i128, if we see one - // so rather than return one we need to bail out to SelectionDAG. - if (DestVT == MVT::i128) + // FastISel does not have plumbing to deal with extensions where the SrcVT or + // DestVT are odd things, so test to make sure that they are both types we can + // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise + // bail out to SelectionDAG. + if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && + (DestVT != MVT::i32) && (DestVT != MVT::i64)) || + ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && + (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) return 0; unsigned Opc; From 074b752cc995e50fd87ea43f223c8f0b99140ee7 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 9 Jul 2014 18:22:33 +0000 Subject: [PATCH 0707/1155] [X86] AVX512: Enable it in the Loop Vectorizer This lets us experiment with 512-bit vectorization without passing force-vector-width manually. The code generated for a simple integer memset loop is properly vectorized. Disassembly is still broken for it though :(. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 6 +++- test/Transforms/LoopVectorize/X86/avx512.ll | 35 +++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/LoopVectorize/X86/avx512.ll diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index be9caba506da..c961e2f5b2c8 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -144,13 +144,17 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasSSE1()) return 0; - if (ST->is64Bit()) + if (ST->is64Bit()) { + if (Vector && ST->hasAVX512()) + return 32; return 16; + } return 8; } unsigned X86TTI::getRegisterBitWidth(bool Vector) const { if (Vector) { + if (ST->hasAVX512()) return 512; if (ST->hasAVX()) return 256; if (ST->hasSSE1()) return 128; return 0; diff --git a/test/Transforms/LoopVectorize/X86/avx512.ll b/test/Transforms/LoopVectorize/X86/avx512.ll new file mode 100644 index 000000000000..a2208668177d --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/avx512.ll @@ -0,0 +1,35 @@ +; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Verify that we generate 512-bit wide vectors for a basic integer memset +; loop. + +; CHECK-LABEL: f: +; CHECK: vmovdqu32 %zmm{{.}}, ( +; CHECK-NOT: %ymm + +define void @f(i32* %a, i32 %n) { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %n, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} From 9753da175d55783411fe0f24b46c061fc6788482 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Wed, 9 Jul 2014 18:29:55 +0000 Subject: [PATCH 0708/1155] Update ReleaseNotes to mention Atomic NAND semantic changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212635 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/ReleaseNotes.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 349074a08daa..fb2e248ce36b 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -59,6 +59,10 @@ Non-comprehensive list of changes in this release * The prefix for loop vectorizer hint metadata has been changed from ``llvm.vectorizer`` to ``llvm.loop.vectorize``. +* Some backends previously implemented Atomic NAND(x,y) as ``x & ~y``. Now + all backends implement it as ``~(x & y)``, matching the semantics of GCC 4.4 + and later. + .. NOTE For small 1-3 sentence descriptions, just add an entry at the end of this list. If your description won't fit comfortably in one bullet From 08b75dd06173d8c261644b338e06b197a3619ce2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 9 Jul 2014 18:53:57 +0000 Subject: [PATCH 0709/1155] TargetRegisterInfo: Remove function that fell out of use years ago. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212636 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetRegisterInfo.h | 6 ------ lib/Target/X86/X86RegisterInfo.cpp | 15 --------------- lib/Target/X86/X86RegisterInfo.h | 4 ---- 3 files changed, 25 deletions(-) diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index a162297afc72..c6f3fbf1c849 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -813,12 +813,6 @@ class TargetRegisterInfo : public MCRegisterInfo { /// getFrameRegister - This method should return the register used as a base /// for values allocated in the current stack frame. virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0; - - /// getCompactUnwindRegNum - This function maps the register to the number for - /// compact unwind encoding. Return -1 if the register isn't valid. - virtual int getCompactUnwindRegNum(unsigned, bool) const { - return -1; - } }; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 236e1a4d949a..e8a7e84bb7e3 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -81,21 +81,6 @@ X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI) BasePtr = Is64Bit ? X86::RBX : X86::ESI; } -/// getCompactUnwindRegNum - This function maps the register to the number for -/// compact unwind encoding. Return -1 if the register isn't valid. -int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { - switch (getLLVMRegNum(RegNum, isEH)) { - case X86::EBX: case X86::RBX: return 1; - case X86::ECX: case X86::R12: return 2; - case X86::EDX: case X86::R13: return 3; - case X86::EDI: case X86::R14: return 4; - case X86::ESI: case X86::R15: return 5; - case X86::EBP: case X86::RBP: return 6; - } - - return -1; -} - bool X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { // ExeDepsFixer and PostRAScheduler require liveness. diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index ba346c85a7f9..74efd1fe32d7 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -60,10 +60,6 @@ class X86RegisterInfo final : public X86GenRegisterInfo { // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; - /// getCompactUnwindRegNum - This function maps the register to the number for - /// compact unwind encoding. Return -1 if the register isn't valid. - int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const override; - /// Code Generation virtual methods... /// bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; From f634247b1d953ec3d6eac29a520696242344976a Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 9 Jul 2014 18:55:49 +0000 Subject: [PATCH 0710/1155] Change an assert() to a diagnostic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212637 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/TableGen/CodeGenDAGPatterns.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 00bc9a5bbb9b..2602bbcf6f61 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -2119,9 +2119,11 @@ InferAllTypes(const StringMap > *InNamedTypes) { // If we have input named node types, propagate their types to the named // values here. if (InNamedTypes) { - // FIXME: Should be error? - assert(InNamedTypes->count(I->getKey()) && - "Named node in output pattern but not input pattern?"); + if (!InNamedTypes->count(I->getKey())) { + error("Node '" + std::string(I->getKey()) + + "' in output pattern but not input pattern"); + return true; + } const SmallVectorImpl &InNodes = InNamedTypes->find(I->getKey())->second; From a3edd6a0386363b60ec41ae1407c0149cdd7b259 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 9 Jul 2014 18:55:52 +0000 Subject: [PATCH 0711/1155] AArch64: Better codegen for storing to __fp16. Storing will generally be immediately preceded by rounding from an f32 or f64, so make sure to match those patterns directly to convert into the FPR16 register class directly rather than going through the integer GPRs. This also eliminates an extra step in the convert-from-f64 path which was first converting to f32 and then to f16 from there. rdar://17594379 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212638 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 40 ++++++++ test/CodeGen/AArch64/f16-convert.ll | 126 +++++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 000a3be085cc..d7c6d78831f7 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2276,6 +2276,46 @@ def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))), (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; +// When converting to f16 going directly to a store, make sure we use the +// appropriate direct conversion instructions and store via the FPR16 +// registers rather than going through the GPRs. +let AddedComplexity = 10 in { +// f32->f16 +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)>; +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>; +// f64->f16 +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)>; +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>; +} + //===----------------------------------------------------------------------===// // Floating point single operand instructions. diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll index 5d70fc2bbd15..6fabdc574974 100644 --- a/test/CodeGen/AArch64/f16-convert.ll +++ b/test/CodeGen/AArch64/f16-convert.ll @@ -125,4 +125,130 @@ define double @load9(i16* nocapture readonly %a) nounwind { ret double %conv } +define void @store0(i16* nocapture %a, float %val) nounwind { +; CHECK-LABEL: store0: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + store i16 %tmp, i16* %a, align 2 + ret void +} + +define void @store1(i16* nocapture %a, double %val) nounwind { +; CHECK-LABEL: store1: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + store i16 %tmp, i16* %a, align 2 + ret void +} + +define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind { +; CHECK-LABEL: store2: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0, w1, sxtw #1] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind { +; CHECK-LABEL: store3: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0, w1, sxtw #1] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind { +; CHECK-LABEL: store4: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0, x1, lsl #1] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %arrayidx = getelementptr inbounds i16* %a, i64 %i + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind { +; CHECK-LABEL: store5: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0, x1, lsl #1] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %arrayidx = getelementptr inbounds i16* %a, i64 %i + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store6(i16* nocapture %a, float %val) nounwind { +; CHECK-LABEL: store6: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0, #20] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %arrayidx = getelementptr inbounds i16* %a, i64 10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store7(i16* nocapture %a, double %val) nounwind { +; CHECK-LABEL: store7: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0, #20] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %arrayidx = getelementptr inbounds i16* %a, i64 10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store8(i16* nocapture %a, float %val) nounwind { +; CHECK-LABEL: store8: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: stur h0, [x0, #-20] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store9(i16* nocapture %a, double %val) nounwind { +; CHECK-LABEL: store9: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: stur h0, [x0, #-20] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16(float) nounwind readnone declare float @llvm.convert.from.fp16(i16) nounwind readnone From 3e8ed894845c0b66dcd504ed69a507f601cc4f19 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 9 Jul 2014 19:12:07 +0000 Subject: [PATCH 0712/1155] Add trunc (select c, a, b) -> select c (trunc a), (trunc b) combine. Do this if the truncate is free and the select is legal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++++++ test/CodeGen/R600/select64.ll | 35 ++++++++++++++++++++++++ test/CodeGen/X86/shift-parts.ll | 8 ++++-- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7497feb1e661..ab07954a6f16 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6013,6 +6013,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (select c, a, b) -> select c, (trunc a), (trunc b) + if (N0.getOpcode() == ISD::SELECT) { + EVT SrcVT = N0.getValueType(); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && + TLI.isTruncateFree(SrcVT, VT)) { + SDLoc SL(N0); + SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); + EVT SetCCVT = getSetCCResultType(VT); + SDValue Cond = DAG.getSExtOrTrunc(N0.getOperand(0), SL, SetCCVT); + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll index 6b87d9865ad6..dba25e3bd21e 100644 --- a/test/CodeGen/R600/select64.ll +++ b/test/CodeGen/R600/select64.ll @@ -13,3 +13,38 @@ entry: store i64 %1, i64 addrspace(1)* %out ret void } + +; CHECK-LABEL: @select_trunc_i64 +; CHECK: V_CNDMASK_B32 +; CHECK-NOT: V_CNDMASK_B32 +define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %sel = select i1 %cmp, i64 0, i64 %in + %trunc = trunc i64 %sel to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: @select_trunc_i64_2 +; CHECK: V_CNDMASK_B32 +; CHECK-NOT: V_CNDMASK_B32 +define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %sel = select i1 %cmp, i64 %a, i64 %b + %trunc = trunc i64 %sel to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: @v_select_trunc_i64_2 +; CHECK: V_CNDMASK_B32 +; CHECK-NOT: V_CNDMASK_B32 +define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %a = load i64 addrspace(1)* %aptr, align 8 + %b = load i64 addrspace(1)* %bptr, align 8 + %sel = select i1 %cmp, i64 %a, i64 %b + %trunc = trunc i64 %sel to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll index ce4f538f4de4..763da6397101 100644 --- a/test/CodeGen/X86/shift-parts.ll +++ b/test/CodeGen/X86/shift-parts.ll @@ -1,17 +1,19 @@ -; RUN: llc < %s -march=x86-64 | grep shrdq +; RUN: llc -march=x86-64 < %s | FileCheck %s ; PR4736 %0 = type { i32, i8, [35 x i8] } @g_144 = external global %0, align 8 ; <%0*> [#uses=1] -define i32 @int87(i32 %uint64p_8) nounwind { +; CHECK: shrdq + +define i32 @int87(i32 %uint64p_8, i1 %cond) nounwind { entry: %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; [#uses=1] br label %for.cond for.cond: ; preds = %for.cond, %entry - %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; [#uses=1] + %call3.in.in.in.v = select i1 %cond, i320 192, i320 128 ; [#uses=1] %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; [#uses=1] %call3.in = trunc i320 %call3.in.in.in to i32 ; [#uses=1] %tobool = icmp eq i32 %call3.in, 0 ; [#uses=1] From bdd9df49fc747e4068cc6ef4dc6b9e0497dda49c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 9 Jul 2014 19:14:34 +0000 Subject: [PATCH 0713/1155] Use simpler constructor for range adapter. It is a good idea, it's slightly clearer and simpler. Unfortunately the headline news is: we save one line! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212641 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Option/ArgList.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h index a0b00b5d87e6..d46b0e892faf 100644 --- a/include/llvm/Option/ArgList.h +++ b/include/llvm/Option/ArgList.h @@ -153,8 +153,7 @@ class ArgList { iterator_range filtered(OptSpecifier Id0 = 0U, OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const { - return iterator_range(filtered_begin(Id0, Id1, Id2), - filtered_end()); + return make_range(filtered_begin(Id0, Id1, Id2), filtered_end()); } /// @} From 3e51f754ad08cc453c5e7476bcc6e92743b68830 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Wed, 9 Jul 2014 19:40:08 +0000 Subject: [PATCH 0714/1155] Decouple llvm::SpecialCaseList text representation and its LLVM IR semantics. Turn llvm::SpecialCaseList into a simple class that parses text files in a specified format and knows nothing about LLVM IR. Move this class into LLVMSupport library. Implement two users of this class: * DFSanABIList in DFSan instrumentation pass. * SanitizerBlacklist in Clang CodeGen library. The latter will be modified to use actual source-level information from frontend (source file names) instead of unstable LLVM IR things (LLVM Module identifier). Remove dependency edge from ClangCodeGen/ClangDriver to LLVMTransformUtils. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212643 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/SpecialCaseList.h | 94 +++++++ .../llvm/Transforms/Utils/SpecialCaseList.h | 114 --------- lib/Support/CMakeLists.txt | 1 + .../Utils => Support}/SpecialCaseList.cpp | 50 +--- .../Instrumentation/DataFlowSanitizer.cpp | 61 ++++- lib/Transforms/Utils/CMakeLists.txt | 1 - unittests/Support/CMakeLists.txt | 1 + unittests/Support/SpecialCaseListTest.cpp | 126 ++++++++++ unittests/Transforms/Utils/CMakeLists.txt | 1 - .../Transforms/Utils/SpecialCaseList.cpp | 232 ------------------ 10 files changed, 278 insertions(+), 403 deletions(-) create mode 100644 include/llvm/Support/SpecialCaseList.h delete mode 100644 include/llvm/Transforms/Utils/SpecialCaseList.h rename lib/{Transforms/Utils => Support}/SpecialCaseList.cpp (76%) create mode 100644 unittests/Support/SpecialCaseListTest.cpp delete mode 100644 unittests/Transforms/Utils/SpecialCaseList.cpp diff --git a/include/llvm/Support/SpecialCaseList.h b/include/llvm/Support/SpecialCaseList.h new file mode 100644 index 000000000000..192e15ab2211 --- /dev/null +++ b/include/llvm/Support/SpecialCaseList.h @@ -0,0 +1,94 @@ +//===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// This is a utility class used to parse user-provided text files with +// "special case lists" for code sanitizers. Such files are used to +// define "ABI list" for DataFlowSanitizer and blacklists for another sanitizers +// like AddressSanitizer or UndefinedBehaviorSanitizer. +// +// Empty lines and lines starting with "#" are ignored. All the rest lines +// should have the form: +// section:wildcard_expression[=category] +// If category is not specified, it is assumed to be empty string. +// Definitions of "section" and "category" are sanitizer-specific. For example, +// sanitizer blacklists support sections "src", "fun" and "global". +// Wildcard expressions define, respectively, source files, functions or +// globals which shouldn't be instrumented. +// Examples of categories: +// "functional": used in DFSan to list functions with pure functional +// semantics. +// "init": used in ASan blacklist to disable initialization-order bugs +// detection for certain globals or source files. +// Full special case list file example: +// --- +// # Blacklisted items: +// fun:*_ZN4base6subtle* +// global:*global_with_bad_access_or_initialization* +// global:*global_with_initialization_issues*=init +// type:*Namespace::ClassName*=init +// src:file_with_tricky_code.cc +// src:ignore-global-initializers-issues.cc=init +// +// # Functions with pure functional semantics: +// fun:cos=functional +// fun:sin=functional +// --- +// Note that the wild card is in fact an llvm::Regex, but * is automatically +// replaced with .* +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_SPECIALCASELIST_H +#define LLVM_SUPPORT_SPECIALCASELIST_H + +#include "llvm/ADT/StringMap.h" + +namespace llvm { +class MemoryBuffer; +class Regex; +class StringRef; + +class SpecialCaseList { + public: + /// Parses the special case list from a file. If Path is empty, returns + /// an empty special case list. On failure, returns 0 and writes an error + /// message to string. + static SpecialCaseList *create(const StringRef Path, std::string &Error); + /// Parses the special case list from a memory buffer. On failure, returns + /// 0 and writes an error message to string. + static SpecialCaseList *create(const MemoryBuffer *MB, std::string &Error); + /// Parses the special case list from a file. On failure, reports a fatal + /// error. + static SpecialCaseList *createOrDie(const StringRef Path); + + ~SpecialCaseList(); + + /// Returns true, if special case list contains a line + /// @Section:=@Category + /// and @Query satisfies a wildcard expression . + bool inSection(const StringRef Section, const StringRef Query, + const StringRef Category = StringRef()) const; + + private: + SpecialCaseList(SpecialCaseList const &) LLVM_DELETED_FUNCTION; + SpecialCaseList &operator=(SpecialCaseList const &) LLVM_DELETED_FUNCTION; + + struct Entry; + StringMap > Entries; + + SpecialCaseList(); + /// Parses just-constructed SpecialCaseList entries from a memory buffer. + bool parse(const MemoryBuffer *MB, std::string &Error); +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_SPECIALCASELIST_H + diff --git a/include/llvm/Transforms/Utils/SpecialCaseList.h b/include/llvm/Transforms/Utils/SpecialCaseList.h deleted file mode 100644 index 508a6df5dce4..000000000000 --- a/include/llvm/Transforms/Utils/SpecialCaseList.h +++ /dev/null @@ -1,114 +0,0 @@ -//===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions or global -// variables based on a user-supplied list. -// -// The list can also specify categories for specific globals, which can be used -// to instruct an instrumentation pass to treat certain functions or global -// variables in a specific way, such as by omitting certain aspects of -// instrumentation while keeping others, or informing the instrumentation pass -// that a specific uninstrumentable function has certain semantics, thus -// allowing the pass to instrument callers according to those semantics. -// -// For example, AddressSanitizer uses the "init" category for globals whose -// initializers should not be instrumented, but which in all other respects -// should be instrumented. -// -// Each line contains a prefix, followed by a colon and a wild card expression, -// followed optionally by an equals sign and an instrumentation-specific -// category. Empty lines and lines starting with "#" are ignored. -// --- -// # Blacklisted items: -// fun:*_ZN4base6subtle* -// global:*global_with_bad_access_or_initialization* -// global:*global_with_initialization_issues*=init -// type:*Namespace::ClassName*=init -// src:file_with_tricky_code.cc -// src:ignore-global-initializers-issues.cc=init -// -// # Functions with pure functional semantics: -// fun:cos=functional -// fun:sin=functional -// --- -// Note that the wild card is in fact an llvm::Regex, but * is automatically -// replaced with .* -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_UTILS_SPECIALCASELIST_H -#define LLVM_TRANSFORMS_UTILS_SPECIALCASELIST_H - -#include "llvm/ADT/StringMap.h" - -namespace llvm { -class Function; -class GlobalAlias; -class GlobalVariable; -class MemoryBuffer; -class Module; -class Regex; -class StringRef; - -class SpecialCaseList { - public: - /// Parses the special case list from a file. If Path is empty, returns - /// an empty special case list. On failure, returns 0 and writes an error - /// message to string. - static SpecialCaseList *create(const StringRef Path, std::string &Error); - /// Parses the special case list from a memory buffer. On failure, returns - /// 0 and writes an error message to string. - static SpecialCaseList *create(const MemoryBuffer *MB, std::string &Error); - /// Parses the special case list from a file. On failure, reports a fatal - /// error. - static SpecialCaseList *createOrDie(const StringRef Path); - - ~SpecialCaseList(); - - /// Returns whether either this function or its source file are listed in the - /// given category, which may be omitted to search the empty category. - bool isIn(const Function &F, const StringRef Category = StringRef()) const; - - /// Returns whether this global, its type or its source file are listed in the - /// given category, which may be omitted to search the empty category. - bool isIn(const GlobalVariable &G, - const StringRef Category = StringRef()) const; - - /// Returns whether this global alias is listed in the given category, which - /// may be omitted to search the empty category. - /// - /// If GA aliases a function, the alias's name is matched as a function name - /// would be. Similarly, aliases of globals are matched like globals. - bool isIn(const GlobalAlias &GA, - const StringRef Category = StringRef()) const; - - /// Returns whether this module is listed in the given category, which may be - /// omitted to search the empty category. - bool isIn(const Module &M, const StringRef Category = StringRef()) const; - - private: - SpecialCaseList(SpecialCaseList const &) LLVM_DELETED_FUNCTION; - SpecialCaseList &operator=(SpecialCaseList const &) LLVM_DELETED_FUNCTION; - - struct Entry; - StringMap > Entries; - - SpecialCaseList(); - /// Parses just-constructed SpecialCaseList entries from a memory buffer. - bool parse(const MemoryBuffer *MB, std::string &Error); - - bool inSectionCategory(const StringRef Section, const StringRef Query, - const StringRef Category) const; -}; - -} // namespace llvm - -#endif // LLVM_TRANSFORMS_UTILS_SPECIALCASELIST_H diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 033eae041c5f..9ecd55935ea4 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_library(LLVMSupport SmallPtrSet.cpp SmallVector.cpp SourceMgr.cpp + SpecialCaseList.cpp Statistic.cpp StreamableMemoryObject.cpp StringExtras.cpp diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp similarity index 76% rename from lib/Transforms/Utils/SpecialCaseList.cpp rename to lib/Support/SpecialCaseList.cpp index 3d76a17abd90..d9921ac69205 100644 --- a/lib/Transforms/Utils/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -14,15 +14,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/SpecialCaseList.h" +#include "llvm/Support/SpecialCaseList.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" @@ -169,48 +165,8 @@ SpecialCaseList::~SpecialCaseList() { } } -bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { - return isIn(*F.getParent(), Category) || - inSectionCategory("fun", F.getName(), Category); -} - -static StringRef GetGlobalTypeString(const GlobalValue &G) { - // Types of GlobalVariables are always pointer types. - Type *GType = G.getType()->getElementType(); - // For now we support blacklisting struct types only. - if (StructType *SGType = dyn_cast(GType)) { - if (!SGType->isLiteral()) - return SGType->getName(); - } - return ""; -} - -bool SpecialCaseList::isIn(const GlobalVariable &G, - const StringRef Category) const { - return isIn(*G.getParent(), Category) || - inSectionCategory("global", G.getName(), Category) || - inSectionCategory("type", GetGlobalTypeString(G), Category); -} - -bool SpecialCaseList::isIn(const GlobalAlias &GA, - const StringRef Category) const { - if (isIn(*GA.getParent(), Category)) - return true; - - if (isa(GA.getType()->getElementType())) - return inSectionCategory("fun", GA.getName(), Category); - - return inSectionCategory("global", GA.getName(), Category) || - inSectionCategory("type", GetGlobalTypeString(GA), Category); -} - -bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { - return inSectionCategory("src", M.getModuleIdentifier(), Category); -} - -bool SpecialCaseList::inSectionCategory(const StringRef Section, - const StringRef Query, - const StringRef Category) const { +bool SpecialCaseList::inSection(const StringRef Section, const StringRef Query, + const StringRef Category) const { StringMap >::const_iterator I = Entries.find(Section); if (I == Entries.end()) return false; StringMap::const_iterator II = I->second.find(Category); diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 7f468f79e22d..3b9212a14e66 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -59,9 +59,9 @@ #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/SpecialCaseList.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SpecialCaseList.h" #include using namespace llvm; @@ -120,6 +120,51 @@ static cl::opt ClDebugNonzeroLabels( namespace { +StringRef GetGlobalTypeString(const GlobalValue &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getType()->getElementType(); + // For now we support blacklisting struct types only. + if (StructType *SGType = dyn_cast(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return ""; +} + +class DFSanABIList { + std::unique_ptr SCL; + + public: + DFSanABIList(SpecialCaseList *SCL) : SCL(SCL) {} + + /// Returns whether either this function or its source file are listed in the + /// given category. + bool isIn(const Function &F, const StringRef Category) const { + return isIn(*F.getParent(), Category) || + SCL->inSection("fun", F.getName(), Category); + } + + /// Returns whether this global alias is listed in the given category. + /// + /// If GA aliases a function, the alias's name is matched as a function name + /// would be. Similarly, aliases of globals are matched like globals. + bool isIn(const GlobalAlias &GA, const StringRef Category) const { + if (isIn(*GA.getParent(), Category)) + return true; + + if (isa(GA.getType()->getElementType())) + return SCL->inSection("fun", GA.getName(), Category); + + return SCL->inSection("global", GA.getName(), Category) || + SCL->inSection("type", GetGlobalTypeString(GA), Category); + } + + /// Returns whether this module is listed in the given category. + bool isIn(const Module &M, const StringRef Category) const { + return SCL->inSection("src", M.getModuleIdentifier(), Category); + } +}; + class DataFlowSanitizer : public ModulePass { friend struct DFSanFunction; friend class DFSanVisitor; @@ -190,7 +235,7 @@ class DataFlowSanitizer : public ModulePass { Constant *DFSanSetLabelFn; Constant *DFSanNonzeroLabelFn; MDNode *ColdCallWeights; - std::unique_ptr ABIList; + DFSanABIList ABIList; DenseMap UnwrappedFnMap; AttributeSet ReadOnlyNoneAttrs; @@ -395,11 +440,11 @@ bool DataFlowSanitizer::doInitialization(Module &M) { } bool DataFlowSanitizer::isInstrumented(const Function *F) { - return !ABIList->isIn(*F, "uninstrumented"); + return !ABIList.isIn(*F, "uninstrumented"); } bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) { - return !ABIList->isIn(*GA, "uninstrumented"); + return !ABIList.isIn(*GA, "uninstrumented"); } DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { @@ -407,11 +452,11 @@ DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { } DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { - if (ABIList->isIn(*F, "functional")) + if (ABIList.isIn(*F, "functional")) return WK_Functional; - if (ABIList->isIn(*F, "discard")) + if (ABIList.isIn(*F, "discard")) return WK_Discard; - if (ABIList->isIn(*F, "custom")) + if (ABIList.isIn(*F, "custom")) return WK_Custom; return WK_Warning; @@ -500,7 +545,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { if (!DL) return false; - if (ABIList->isIn(M, "skip")) + if (ABIList.isIn(M, "skip")) return false; if (!GetArgTLSPtr) { diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index e10ca90749c5..fcf548f97c5d 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -33,7 +33,6 @@ add_llvm_library(LLVMTransformUtils SimplifyIndVar.cpp SimplifyInstructions.cpp SimplifyLibCalls.cpp - SpecialCaseList.cpp UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt index 08096a4a179a..97c5c43aeb6e 100644 --- a/unittests/Support/CMakeLists.txt +++ b/unittests/Support/CMakeLists.txt @@ -31,6 +31,7 @@ add_llvm_unittest(SupportTests RegexTest.cpp ScaledNumberTest.cpp SourceMgrTest.cpp + SpecialCaseListTest.cpp StringPool.cpp SwapByteOrderTest.cpp ThreadLocalTest.cpp diff --git a/unittests/Support/SpecialCaseListTest.cpp b/unittests/Support/SpecialCaseListTest.cpp new file mode 100644 index 000000000000..bb9c351415bf --- /dev/null +++ b/unittests/Support/SpecialCaseListTest.cpp @@ -0,0 +1,126 @@ +//===- SpecialCaseListTest.cpp - Unit tests for SpecialCaseList -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SpecialCaseList.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +class SpecialCaseListTest : public ::testing::Test { +protected: + SpecialCaseList *makeSpecialCaseList(StringRef List, std::string &Error) { + std::unique_ptr MB(MemoryBuffer::getMemBuffer(List)); + return SpecialCaseList::create(MB.get(), Error); + } + + SpecialCaseList *makeSpecialCaseList(StringRef List) { + std::string Error; + SpecialCaseList *SCL = makeSpecialCaseList(List, Error); + assert(SCL); + assert(Error == ""); + return SCL; + } +}; + +TEST_F(SpecialCaseListTest, Basic) { + std::unique_ptr SCL( + makeSpecialCaseList("# This is a comment.\n" + "\n" + "src:hello\n" + "src:bye\n" + "src:hi=category\n" + "src:z*=category\n")); + EXPECT_TRUE(SCL->inSection("src", "hello")); + EXPECT_TRUE(SCL->inSection("src", "bye")); + EXPECT_TRUE(SCL->inSection("src", "hi", "category")); + EXPECT_TRUE(SCL->inSection("src", "zzzz", "category")); + EXPECT_FALSE(SCL->inSection("src", "hi")); + EXPECT_FALSE(SCL->inSection("fun", "hello")); + EXPECT_FALSE(SCL->inSection("src", "hello", "category")); +} + +TEST_F(SpecialCaseListTest, GlobalInitCompat) { + std::unique_ptr SCL( + makeSpecialCaseList("global:foo=init\n")); + EXPECT_FALSE(SCL->inSection("global", "foo")); + EXPECT_FALSE(SCL->inSection("global", "bar")); + EXPECT_TRUE(SCL->inSection("global", "foo", "init")); + EXPECT_FALSE(SCL->inSection("global", "bar", "init")); + + SCL.reset(makeSpecialCaseList("global-init:foo\n")); + EXPECT_FALSE(SCL->inSection("global", "foo")); + EXPECT_FALSE(SCL->inSection("global", "bar")); + EXPECT_TRUE(SCL->inSection("global", "foo", "init")); + EXPECT_FALSE(SCL->inSection("global", "bar", "init")); + + SCL.reset(makeSpecialCaseList("type:t2=init\n")); + EXPECT_FALSE(SCL->inSection("type", "t1")); + EXPECT_FALSE(SCL->inSection("type", "t2")); + EXPECT_FALSE(SCL->inSection("type", "t1", "init")); + EXPECT_TRUE(SCL->inSection("type", "t2", "init")); + + SCL.reset(makeSpecialCaseList("global-init-type:t2\n")); + EXPECT_FALSE(SCL->inSection("type", "t1")); + EXPECT_FALSE(SCL->inSection("type", "t2")); + EXPECT_FALSE(SCL->inSection("type", "t1", "init")); + EXPECT_TRUE(SCL->inSection("type", "t2", "init")); + + SCL.reset(makeSpecialCaseList("src:hello=init\n")); + EXPECT_FALSE(SCL->inSection("src", "hello")); + EXPECT_FALSE(SCL->inSection("src", "bye")); + EXPECT_TRUE(SCL->inSection("src", "hello", "init")); + EXPECT_FALSE(SCL->inSection("src", "bye", "init")); + + SCL.reset(makeSpecialCaseList("global-init-src:hello\n")); + EXPECT_FALSE(SCL->inSection("src", "hello")); + EXPECT_FALSE(SCL->inSection("src", "bye")); + EXPECT_TRUE(SCL->inSection("src", "hello", "init")); + EXPECT_FALSE(SCL->inSection("src", "bye", "init")); +} + +TEST_F(SpecialCaseListTest, Substring) { + std::unique_ptr SCL(makeSpecialCaseList("src:hello\n" + "fun:foo\n" + "global:bar\n")); + EXPECT_FALSE(SCL->inSection("src", "othello")); + EXPECT_FALSE(SCL->inSection("fun", "tomfoolery")); + EXPECT_FALSE(SCL->inSection("global", "bartender")); + + SCL.reset(makeSpecialCaseList("fun:*foo*\n")); + EXPECT_TRUE(SCL->inSection("fun", "tomfoolery")); + EXPECT_TRUE(SCL->inSection("fun", "foobar")); +} + +TEST_F(SpecialCaseListTest, InvalidSpecialCaseList) { + std::string Error; + EXPECT_EQ(nullptr, makeSpecialCaseList("badline", Error)); + EXPECT_EQ("Malformed line 1: 'badline'", Error); + EXPECT_EQ(nullptr, makeSpecialCaseList("src:bad[a-", Error)); + EXPECT_EQ("Malformed regex in line 1: 'bad[a-': invalid character range", + Error); + EXPECT_EQ(nullptr, makeSpecialCaseList("src:a.c\n" + "fun:fun(a\n", + Error)); + EXPECT_EQ("Malformed regex in line 2: 'fun(a': parentheses not balanced", + Error); + EXPECT_EQ(nullptr, SpecialCaseList::create("unexisting", Error)); + EXPECT_EQ(0U, Error.find("Can't open file 'unexisting':")); +} + +TEST_F(SpecialCaseListTest, EmptySpecialCaseList) { + std::unique_ptr SCL(makeSpecialCaseList("")); + EXPECT_FALSE(SCL->inSection("foo", "bar")); +} + +} + + diff --git a/unittests/Transforms/Utils/CMakeLists.txt b/unittests/Transforms/Utils/CMakeLists.txt index 60447bb52105..ffa1d49d380b 100644 --- a/unittests/Transforms/Utils/CMakeLists.txt +++ b/unittests/Transforms/Utils/CMakeLists.txt @@ -9,5 +9,4 @@ add_llvm_unittest(UtilsTests Cloning.cpp IntegerDivision.cpp Local.cpp - SpecialCaseList.cpp ) diff --git a/unittests/Transforms/Utils/SpecialCaseList.cpp b/unittests/Transforms/Utils/SpecialCaseList.cpp deleted file mode 100644 index bcbca493af23..000000000000 --- a/unittests/Transforms/Utils/SpecialCaseList.cpp +++ /dev/null @@ -1,232 +0,0 @@ -//===- SpecialCaseList.cpp - Unit tests for SpecialCaseList ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Transforms/Utils/SpecialCaseList.h" -#include "gtest/gtest.h" - -using namespace llvm; - -namespace { - -class SpecialCaseListTest : public ::testing::Test { -protected: - Function *makeFunction(StringRef Name, Module &M) { - return Function::Create(FunctionType::get(Type::getVoidTy(Ctx), false), - GlobalValue::ExternalLinkage, - Name, - &M); - } - - GlobalVariable *makeGlobal(StringRef Name, StringRef StructName, Module &M) { - StructType *ST = - StructType::create(StructName, Type::getInt32Ty(Ctx), (Type*)nullptr); - return new GlobalVariable( - M, ST, false, GlobalValue::ExternalLinkage, nullptr, Name); - } - - GlobalAlias *makeAlias(StringRef Name, GlobalObject *Aliasee) { - return GlobalAlias::create(GlobalValue::ExternalLinkage, Name, Aliasee); - } - - SpecialCaseList *makeSpecialCaseList(StringRef List, std::string &Error) { - std::unique_ptr MB(MemoryBuffer::getMemBuffer(List)); - return SpecialCaseList::create(MB.get(), Error); - } - - SpecialCaseList *makeSpecialCaseList(StringRef List) { - std::string Error; - SpecialCaseList *SCL = makeSpecialCaseList(List, Error); - assert(SCL); - assert(Error == ""); - return SCL; - } - - LLVMContext Ctx; -}; - -TEST_F(SpecialCaseListTest, ModuleIsIn) { - Module M("hello", Ctx); - Function *F = makeFunction("foo", M); - GlobalVariable *GV = makeGlobal("bar", "t", M); - - std::unique_ptr SCL( - makeSpecialCaseList("# This is a comment.\n" - "\n" - "src:hello\n")); - EXPECT_TRUE(SCL->isIn(M)); - EXPECT_TRUE(SCL->isIn(*F)); - EXPECT_TRUE(SCL->isIn(*GV)); - - SCL.reset(makeSpecialCaseList("src:he*o\n")); - EXPECT_TRUE(SCL->isIn(M)); - EXPECT_TRUE(SCL->isIn(*F)); - EXPECT_TRUE(SCL->isIn(*GV)); - - SCL.reset(makeSpecialCaseList("src:hi\n")); - EXPECT_FALSE(SCL->isIn(M)); - EXPECT_FALSE(SCL->isIn(*F)); - EXPECT_FALSE(SCL->isIn(*GV)); -} - -TEST_F(SpecialCaseListTest, FunctionIsIn) { - Module M("hello", Ctx); - Function *Foo = makeFunction("foo", M); - Function *Bar = makeFunction("bar", M); - - std::unique_ptr SCL(makeSpecialCaseList("fun:foo\n")); - EXPECT_TRUE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - - SCL.reset(makeSpecialCaseList("fun:b*\n")); - EXPECT_FALSE(SCL->isIn(*Foo)); - EXPECT_TRUE(SCL->isIn(*Bar)); - - SCL.reset(makeSpecialCaseList("fun:f*\n" - "fun:bar\n")); - EXPECT_TRUE(SCL->isIn(*Foo)); - EXPECT_TRUE(SCL->isIn(*Bar)); - - SCL.reset(makeSpecialCaseList("fun:foo=functional\n")); - EXPECT_TRUE(SCL->isIn(*Foo, "functional")); - StringRef Category; - EXPECT_FALSE(SCL->isIn(*Bar, "functional")); -} - -TEST_F(SpecialCaseListTest, GlobalIsIn) { - Module M("hello", Ctx); - GlobalVariable *Foo = makeGlobal("foo", "t1", M); - GlobalVariable *Bar = makeGlobal("bar", "t2", M); - - std::unique_ptr SCL(makeSpecialCaseList("global:foo\n")); - EXPECT_TRUE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - EXPECT_FALSE(SCL->isIn(*Foo, "init")); - EXPECT_FALSE(SCL->isIn(*Bar, "init")); - - SCL.reset(makeSpecialCaseList("global:foo=init\n")); - EXPECT_FALSE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - EXPECT_TRUE(SCL->isIn(*Foo, "init")); - EXPECT_FALSE(SCL->isIn(*Bar, "init")); - - SCL.reset(makeSpecialCaseList("global-init:foo\n")); - EXPECT_FALSE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - EXPECT_TRUE(SCL->isIn(*Foo, "init")); - EXPECT_FALSE(SCL->isIn(*Bar, "init")); - - SCL.reset(makeSpecialCaseList("type:t2=init\n")); - EXPECT_FALSE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - EXPECT_FALSE(SCL->isIn(*Foo, "init")); - EXPECT_TRUE(SCL->isIn(*Bar, "init")); - - SCL.reset(makeSpecialCaseList("global-init-type:t2\n")); - EXPECT_FALSE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - EXPECT_FALSE(SCL->isIn(*Foo, "init")); - EXPECT_TRUE(SCL->isIn(*Bar, "init")); - - SCL.reset(makeSpecialCaseList("src:hello=init\n")); - EXPECT_FALSE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - EXPECT_TRUE(SCL->isIn(*Foo, "init")); - EXPECT_TRUE(SCL->isIn(*Bar, "init")); - - SCL.reset(makeSpecialCaseList("global-init-src:hello\n")); - EXPECT_FALSE(SCL->isIn(*Foo)); - EXPECT_FALSE(SCL->isIn(*Bar)); - EXPECT_TRUE(SCL->isIn(*Foo, "init")); - EXPECT_TRUE(SCL->isIn(*Bar, "init")); -} - -TEST_F(SpecialCaseListTest, AliasIsIn) { - Module M("hello", Ctx); - Function *Foo = makeFunction("foo", M); - GlobalVariable *Bar = makeGlobal("bar", "t", M); - GlobalAlias *FooAlias = makeAlias("fooalias", Foo); - GlobalAlias *BarAlias = makeAlias("baralias", Bar); - - std::unique_ptr SCL(makeSpecialCaseList("fun:foo\n")); - EXPECT_FALSE(SCL->isIn(*FooAlias)); - EXPECT_FALSE(SCL->isIn(*BarAlias)); - - SCL.reset(makeSpecialCaseList("global:bar\n")); - EXPECT_FALSE(SCL->isIn(*FooAlias)); - EXPECT_FALSE(SCL->isIn(*BarAlias)); - - SCL.reset(makeSpecialCaseList("global:fooalias\n")); - EXPECT_FALSE(SCL->isIn(*FooAlias)); - EXPECT_FALSE(SCL->isIn(*BarAlias)); - - SCL.reset(makeSpecialCaseList("fun:fooalias\n")); - EXPECT_TRUE(SCL->isIn(*FooAlias)); - EXPECT_FALSE(SCL->isIn(*BarAlias)); - - SCL.reset(makeSpecialCaseList("global:baralias=init\n")); - EXPECT_FALSE(SCL->isIn(*FooAlias, "init")); - EXPECT_TRUE(SCL->isIn(*BarAlias, "init")); - - SCL.reset(makeSpecialCaseList("type:t=init\n")); - EXPECT_FALSE(SCL->isIn(*FooAlias, "init")); - EXPECT_TRUE(SCL->isIn(*BarAlias, "init")); - - SCL.reset(makeSpecialCaseList("fun:baralias=init\n")); - EXPECT_FALSE(SCL->isIn(*FooAlias, "init")); - EXPECT_FALSE(SCL->isIn(*BarAlias, "init")); -} - -TEST_F(SpecialCaseListTest, Substring) { - Module M("othello", Ctx); - Function *F = makeFunction("tomfoolery", M); - GlobalVariable *GV = makeGlobal("bartender", "t", M); - GlobalAlias *GA1 = makeAlias("buffoonery", F); - GlobalAlias *GA2 = makeAlias("foobar", GV); - - std::unique_ptr SCL(makeSpecialCaseList("src:hello\n" - "fun:foo\n" - "global:bar\n")); - EXPECT_FALSE(SCL->isIn(M)); - EXPECT_FALSE(SCL->isIn(*F)); - EXPECT_FALSE(SCL->isIn(*GV)); - EXPECT_FALSE(SCL->isIn(*GA1)); - EXPECT_FALSE(SCL->isIn(*GA2)); - - SCL.reset(makeSpecialCaseList("fun:*foo*\n")); - EXPECT_TRUE(SCL->isIn(*F)); - EXPECT_TRUE(SCL->isIn(*GA1)); -} - -TEST_F(SpecialCaseListTest, InvalidSpecialCaseList) { - std::string Error; - EXPECT_EQ(nullptr, makeSpecialCaseList("badline", Error)); - EXPECT_EQ("Malformed line 1: 'badline'", Error); - EXPECT_EQ(nullptr, makeSpecialCaseList("src:bad[a-", Error)); - EXPECT_EQ("Malformed regex in line 1: 'bad[a-': invalid character range", - Error); - EXPECT_EQ(nullptr, makeSpecialCaseList("src:a.c\n" - "fun:fun(a\n", - Error)); - EXPECT_EQ("Malformed regex in line 2: 'fun(a': parentheses not balanced", - Error); - EXPECT_EQ(nullptr, SpecialCaseList::create("unexisting", Error)); - EXPECT_EQ(0U, Error.find("Can't open file 'unexisting':")); -} - -TEST_F(SpecialCaseListTest, EmptySpecialCaseList) { - std::unique_ptr SCL(makeSpecialCaseList("")); - Module M("foo", Ctx); - EXPECT_FALSE(SCL->isIn(M)); -} - -} From eff0c67b6a54e293917159e54422219eab145791 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 9 Jul 2014 21:02:41 +0000 Subject: [PATCH 0715/1155] Recommit r212203: Don't try to construct debug LexicalScopes hierarchy for functions that do not have top level debug information. Reverted by Eric Christopher (Thanks!) in r212203 after Bob Wilson reported LTO issues. Duncan Exon Smith and Aditya Nandakumar helped provide a reduced reproduction, though the failure wasn't too hard to guess, and even easier with the example to confirm. The assertion that the subprogram metadata associated with an llvm::Function matches the scope data referenced by the DbgLocs on the instructions in that function is not valid under LTO. In LTO, a C++ inline function might exist in multiple CUs and the subprogram metadata nodes will refer to the same llvm::Function. In this case, depending on the order of the CUs, the first intance of the subprogram metadata may not be the one referenced by the instructions in that function and the assertion will fail. A test case (test/DebugInfo/cross-cu-linkonce-distinct.ll) is added, the assertion removed and a comment added to explain this situation. Original commit message: If a function isn't actually in a CU's subprogram list in the debug info metadata, ignore all the DebugLocs and don't try to build scopes, track variables, etc. While this is possibly a minor optimization, it's also a correctness fix for an incoming patch that will add assertions to LexicalScopes and the debug info verifier to ensure that all scope chains lead to debug info for the current function. Fix up a few test cases that had broken/incomplete debug info that could violate this constraint. Add a test case where this occurs by design (inlining a debug-info-having function in an attribute nodebug function - we want this to work because /if/ the nodebug function is then inlined into a debug-info-having function, it should be fine (and will work fine - we just stitch the scopes up as usual), but should the inlining not happen we need to not assert fail either). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212649 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 17 +- lib/CodeGen/AsmPrinter/DwarfDebug.h | 2 + lib/CodeGen/LiveDebugVariables.cpp | 15 +- lib/CodeGen/LiveDebugVariables.h | 3 + .../CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll | 2 +- .../X86/dbg-changes-codegen-branch-folding.ll | 169 ++++++++++++++---- test/DebugInfo/X86/concrete_out_of_line.ll | 4 +- test/DebugInfo/cross-cu-linkonce-distinct.ll | 95 ++++++++++ test/DebugInfo/nodebug.ll | 51 ++++++ 9 files changed, 314 insertions(+), 44 deletions(-) create mode 100644 test/DebugInfo/cross-cu-linkonce-distinct.ll create mode 100644 test/DebugInfo/nodebug.ll diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f78ca2c03b4e..77860c0bd105 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -730,6 +730,8 @@ void DwarfDebug::beginModule() { const Module *M = MMI->getModule(); + FunctionDIs = makeSubprogramMap(*M); + // If module has named metadata anchors then use them, otherwise scan the // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); @@ -1415,6 +1417,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; + auto DI = FunctionDIs.find(MF->getFunction()); + if (DI == FunctionDIs.end()) + return; + // Grab the lexical scopes for the function, if we don't have any of those // then we're not going to be able to do anything. LScopes.initialize(*MF); @@ -1430,6 +1436,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + // FnScope->getScopeNode() and DI->second should represent the same function, + // though they may not be the same MDNode due to inline functions merged in + // LTO where the debug info metadata still differs (either due to distinct + // written differences - two versions of a linkonce_odr function + // written/copied into two separate files, or some sub-optimal metadata that + // isn't structurally identical (see: file path/name info from clang, which + // includes the directory of the cpp file being built, even when the file name + // is absolute (such as an <> lookup header))) DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); if (Asm->OutStreamer.hasRawTextSupport()) @@ -1527,7 +1541,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { assert(CurFn == MF); assert(CurFn != nullptr); - if (!MMI->hasDebugInfo() || LScopes.empty()) { + if (!MMI->hasDebugInfo() || LScopes.empty() || + !FunctionDIs.count(MF->getFunction())) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index f2aa80845a05..ffe4843b9872 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -330,6 +330,8 @@ class DwarfDebug : public AsmPrinterHandler { DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; + DenseMap FunctionDIs; + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); void addScopeVariable(LexicalScope *LS, DbgVariable *Var); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 388f58fde2ab..7d5646bebfad 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -329,12 +329,13 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), - ModifiedMF(false) {} + LDVImpl(LiveDebugVariables *ps) + : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {} bool runOnMachineFunction(MachineFunction &mf); /// clear - Release all memory. void clear() { + MF = nullptr; userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); @@ -693,11 +694,11 @@ void LDVImpl::computeIntervals() { } bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { + clear(); MF = &mf; LIS = &pass.getAnalysis(); MDT = &pass.getAnalysis(); TRI = mf.getTarget().getRegisterInfo(); - clear(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); @@ -712,6 +713,8 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; + if (!FunctionDIs.count(mf.getFunction())) + return false; if (!pImpl) pImpl = new LDVImpl(this); return static_cast(pImpl)->runOnMachineFunction(mf); @@ -974,6 +977,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, void LDVImpl::emitDebugValues(VirtRegMap *VRM) { DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); + if (!MF) + return; const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); @@ -988,6 +993,10 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { static_cast(pImpl)->emitDebugValues(VRM); } +bool LiveDebugVariables::doInitialization(Module &M) { + FunctionDIs = makeSubprogramMap(M); + return Pass::doInitialization(M); +} #ifndef NDEBUG void LiveDebugVariables::dump() { diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index bb6743547e3d..7ec0d17e42d6 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -22,6 +22,7 @@ #define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -32,6 +33,7 @@ class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { void *pImpl; + DenseMap FunctionDIs; public: static char ID; // Pass identification, replacement for typeid @@ -64,6 +66,7 @@ class LiveDebugVariables : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; + bool doInitialization(Module &) override; }; diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll index b45ac226a650..4181c269b4a0 100644 --- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll +++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll @@ -24,7 +24,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28} !0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* null, null, null, null, i32 11} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786451, metadata !31, metadata !3, metadata !"foo", i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 3, size 32, align 32, offset 0] [def] [from ] !3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ] !4 = metadata !{i32 786449, metadata !31, i32 4, metadata !"4.2.1 LLVM build", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !33, null, null, metadata !""} ; [ DW_TAG_compile_unit ] diff --git a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll index 23f83352eb2e..4912213e7247 100644 --- a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll +++ b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll @@ -52,58 +52,153 @@ define void @_Z3barii(i32 %param1, i32 %param2) #0 { entry: %var1 = alloca %struct.AAA3, align 1 %var2 = alloca %struct.AAA3, align 1 - %tobool = icmp eq i32 %param2, 0 - br i1 %tobool, label %if.end, label %if.then + tail call void @llvm.dbg.value(metadata !{i32 %param1}, i64 0, metadata !30), !dbg !47 + tail call void @llvm.dbg.value(metadata !{i32 %param2}, i64 0, metadata !31), !dbg !47 + tail call void @llvm.dbg.value(metadata !48, i64 0, metadata !32), !dbg !49 + %tobool = icmp eq i32 %param2, 0, !dbg !50 + br i1 %tobool, label %if.end, label %if.then, !dbg !50 if.then: ; preds = %entry - %call = call i8* @_Z5i2stri(i32 %param2) - br label %if.end + %call = tail call i8* @_Z5i2stri(i32 %param2), !dbg !52 + tail call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !32), !dbg !49 + br label %if.end, !dbg !54 if.end: ; preds = %entry, %if.then - call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !60) - call void @llvm.dbg.value(metadata !62, i64 0, metadata !63) - %arraydecay.i = getelementptr inbounds %struct.AAA3* %var1, i64 0, i32 0, i64 0 - call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)) - call void @llvm.dbg.declare(metadata !{%struct.AAA3* %var2}, metadata !38) - %arraydecay.i5 = getelementptr inbounds %struct.AAA3* %var2, i64 0, i32 0, i64 0 - call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)) - %tobool1 = icmp eq i32 %param1, 0 - br i1 %tobool1, label %if.else, label %if.then2 + tail call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !33), !dbg !55 + tail call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !56), !dbg !57 + tail call void @llvm.dbg.value(metadata !58, i64 0, metadata !59), !dbg !60 + %arraydecay.i = getelementptr inbounds %struct.AAA3* %var1, i64 0, i32 0, i64 0, !dbg !61 + call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !61 + call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !34), !dbg !63 + call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !64), !dbg !65 + call void @llvm.dbg.value(metadata !58, i64 0, metadata !66), !dbg !67 + %arraydecay.i5 = getelementptr inbounds %struct.AAA3* %var2, i64 0, i32 0, i64 0, !dbg !68 + call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !68 + %tobool1 = icmp eq i32 %param1, 0, !dbg !69 + call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !34), !dbg !63 + br i1 %tobool1, label %if.else, label %if.then2, !dbg !69 if.then2: ; preds = %if.end - call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)) - br label %if.end3 + call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !71), !dbg !73 + call void @llvm.dbg.value(metadata !74, i64 0, metadata !75), !dbg !76 + call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)), !dbg !76 + br label %if.end3, !dbg !72 if.else: ; preds = %if.end - call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)) + call void @llvm.dbg.value(metadata !{%struct.AAA3* %var2}, i64 0, metadata !77), !dbg !79 + call void @llvm.dbg.value(metadata !80, i64 0, metadata !81), !dbg !82 + call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)), !dbg !82 br label %if.end3 if.end3: ; preds = %if.else, %if.then2 - call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)) - ret void + call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !33), !dbg !55 + call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !83), !dbg !85 + call void @llvm.dbg.value(metadata !58, i64 0, metadata !86), !dbg !87 + call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !87 + ret void, !dbg !88 } -; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata) #1 - -declare i8* @_Z5i2stri(i32) #2 +declare i8* @_Z5i2stri(i32) #1 -declare void @_Z3fooPcjPKc(i8*, i32, i8*) #2 +declare void @_Z3fooPcjPKc(i8*, i32, i8*) #1 ; Function Attrs: nounwind readnone -declare void @llvm.dbg.value(metadata, i64, metadata) #1 +declare void @llvm.dbg.value(metadata, i64, metadata) #2 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone } -attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.module.flags = !{!48, !49} -!llvm.ident = !{!50} - -!38 = metadata !{i32 786688, null, metadata !"var2", null, i32 20, null, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [var2] [line 20] -!48 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} -!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} -!50 = metadata !{metadata !"clang version 3.5 (202418)"} -!60 = metadata !{i32 786689, null, metadata !"this", null, i32 16777216, null, i32 1088, null} ; [ DW_TAG_arg_variable ] [this] [line 0] -!62 = metadata !{i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)} -!63 = metadata !{i32 786689, null, metadata !"value", null, i32 33554439, null, i32 0, null} ; [ DW_TAG_arg_variable ] [value] [line 7] +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!44, !45} +!llvm.ident = !{!46} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !23, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"dbg-changes-codegen-branch-folding.cpp", metadata !"/tmp/dbginfo"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786451, metadata !1, null, metadata !"AAA3", i32 4, i64 32, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS4AAA3"} ; [ DW_TAG_structure_type ] [AAA3] [line 4, size 32, align 8, offset 0] [def] [from ] +!5 = metadata !{metadata !6, metadata !11, metadata !17, metadata !18} +!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS4AAA3", metadata !"text", i32 8, i64 32, i64 8, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [text] [line 8, size 32, align 8, offset 0] [from ] +!7 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !8, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char] +!8 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char] +!9 = metadata !{metadata !10} +!10 = metadata !{i32 786465, i64 0, i64 4} ; [ DW_TAG_subrange_type ] [0, 3] +!11 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"AAA3", metadata !"AAA3", metadata !"", i32 5, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 5} ; [ DW_TAG_subprogram ] [line 5] [AAA3] +!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!13 = metadata !{null, metadata !14, metadata !15} +!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4AAA3] +!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!16 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from char] +!17 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"operator=", metadata !"operator=", metadata !"_ZN4AAA3aSEPKc", i32 6, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 6} ; [ DW_TAG_subprogram ] [line 6] [operator=] +!18 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"operator const char *", metadata !"operator const char *", metadata !"_ZNK4AAA3cvPKcEv", i32 7, metadata !19, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 7} ; [ DW_TAG_subprogram ] [line 7] [operator const char *] +!19 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!20 = metadata !{metadata !15, metadata !21} +!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from ] +!22 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS4AAA3"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS4AAA3] +!23 = metadata !{metadata !24, metadata !35, metadata !40} +!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"bar", metadata !"bar", metadata !"_Z3barii", i32 11, metadata !26, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32)* @_Z3barii, null, null, metadata !29, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [bar] +!25 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] +!26 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!27 = metadata !{null, metadata !28, metadata !28} +!28 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!29 = metadata !{metadata !30, metadata !31, metadata !32, metadata !33, metadata !34} +!30 = metadata !{i32 786689, metadata !24, metadata !"param1", metadata !25, i32 16777227, metadata !28, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [param1] [line 11] +!31 = metadata !{i32 786689, metadata !24, metadata !"param2", metadata !25, i32 33554443, metadata !28, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [param2] [line 11] +!32 = metadata !{i32 786688, metadata !24, metadata !"temp", metadata !25, i32 12, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [temp] [line 12] +!33 = metadata !{i32 786688, metadata !24, metadata !"var1", metadata !25, i32 17, metadata !"_ZTS4AAA3", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [var1] [line 17] +!34 = metadata !{i32 786688, metadata !24, metadata !"var2", metadata !25, i32 18, metadata !"_ZTS4AAA3", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [var2] [line 18] +!35 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"operator=", metadata !"operator=", metadata !"_ZN4AAA3aSEPKc", i32 6, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !17, metadata !36, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [operator=] +!36 = metadata !{metadata !37, metadata !39} +!37 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0] +!38 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4AAA3] +!39 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [value] [line 6] +!40 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4AAA3", metadata !"AAA3", metadata !"AAA3", metadata !"_ZN4AAA3C2EPKc", i32 5, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !11, metadata !41, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [AAA3] +!41 = metadata !{metadata !42, metadata !43} +!42 = metadata !{i32 786689, metadata !40, metadata !"this", null, i32 16777216, metadata !38, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0] +!43 = metadata !{i32 786689, metadata !40, metadata !"value", metadata !25, i32 33554437, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [value] [line 5] +!44 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!45 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!46 = metadata !{metadata !"clang version 3.5.0 "} +!47 = metadata !{i32 11, i32 0, metadata !24, null} +!48 = metadata !{i8* null} +!49 = metadata !{i32 12, i32 0, metadata !24, null} +!50 = metadata !{i32 14, i32 0, metadata !51, null} +!51 = metadata !{i32 786443, metadata !1, metadata !24, i32 14, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] +!52 = metadata !{i32 15, i32 0, metadata !53, null} +!53 = metadata !{i32 786443, metadata !1, metadata !51, i32 14, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] +!54 = metadata !{i32 16, i32 0, metadata !53, null} +!55 = metadata !{i32 17, i32 0, metadata !24, null} +!56 = metadata !{i32 786689, metadata !40, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !55} ; [ DW_TAG_arg_variable ] [this] [line 0] +!57 = metadata !{i32 0, i32 0, metadata !40, metadata !55} +!58 = metadata !{i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)} +!59 = metadata !{i32 786689, metadata !40, metadata !"value", metadata !25, i32 33554437, metadata !15, i32 0, metadata !55} ; [ DW_TAG_arg_variable ] [value] [line 5] +!60 = metadata !{i32 5, i32 0, metadata !40, metadata !55} +!61 = metadata !{i32 5, i32 0, metadata !62, metadata !55} +!62 = metadata !{i32 786443, metadata !1, metadata !40, i32 5, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] +!63 = metadata !{i32 18, i32 0, metadata !24, null} +!64 = metadata !{i32 786689, metadata !40, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !63} ; [ DW_TAG_arg_variable ] [this] [line 0] +!65 = metadata !{i32 0, i32 0, metadata !40, metadata !63} +!66 = metadata !{i32 786689, metadata !40, metadata !"value", metadata !25, i32 33554437, metadata !15, i32 0, metadata !63} ; [ DW_TAG_arg_variable ] [value] [line 5] +!67 = metadata !{i32 5, i32 0, metadata !40, metadata !63} +!68 = metadata !{i32 5, i32 0, metadata !62, metadata !63} +!69 = metadata !{i32 20, i32 0, metadata !70, null} +!70 = metadata !{i32 786443, metadata !1, metadata !24, i32 20, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] +!71 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !72} ; [ DW_TAG_arg_variable ] [this] [line 0] +!72 = metadata !{i32 21, i32 0, metadata !70, null} +!73 = metadata !{i32 0, i32 0, metadata !35, metadata !72} +!74 = metadata !{i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)} +!75 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, metadata !72} ; [ DW_TAG_arg_variable ] [value] [line 6] +!76 = metadata !{i32 6, i32 0, metadata !35, metadata !72} +!77 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !78} ; [ DW_TAG_arg_variable ] [this] [line 0] +!78 = metadata !{i32 23, i32 0, metadata !70, null} +!79 = metadata !{i32 0, i32 0, metadata !35, metadata !78} +!80 = metadata !{i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)} +!81 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, metadata !78} ; [ DW_TAG_arg_variable ] [value] [line 6] +!82 = metadata !{i32 6, i32 0, metadata !35, metadata !78} +!83 = metadata !{i32 786689, metadata !35, metadata !"this", null, i32 16777216, metadata !38, i32 1088, metadata !84} ; [ DW_TAG_arg_variable ] [this] [line 0] +!84 = metadata !{i32 24, i32 0, metadata !24, null} +!85 = metadata !{i32 0, i32 0, metadata !35, metadata !84} +!86 = metadata !{i32 786689, metadata !35, metadata !"value", metadata !25, i32 33554438, metadata !15, i32 0, metadata !84} ; [ DW_TAG_arg_variable ] [value] [line 6] +!87 = metadata !{i32 6, i32 0, metadata !35, metadata !84} +!88 = metadata !{i32 25, i32 0, metadata !24, null} diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index 40300de793d5..ac038f353ada 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -79,7 +79,7 @@ declare void @_Z8moz_freePv(i8*) !0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{} !3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31} -!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [Release] +!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_ZN17nsAutoRefCnt7ReleaseEv , null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [Release] !6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9, metadata !10} @@ -95,7 +95,7 @@ declare void @_Z8moz_freePv(i8*) !18 = metadata !{} !20 = metadata !{metadata !22} !22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ] -!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt] +!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_ZN17nsAutoRefCntD1Ev, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt] !24 = metadata !{metadata !26} !26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ] !27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt] diff --git a/test/DebugInfo/cross-cu-linkonce-distinct.ll b/test/DebugInfo/cross-cu-linkonce-distinct.ll new file mode 100644 index 000000000000..67eb6c0a766d --- /dev/null +++ b/test/DebugInfo/cross-cu-linkonce-distinct.ll @@ -0,0 +1,95 @@ +; REQUIRES: object-emission + +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; Testing that two distinct (distinct by writing them in separate files, while +; still fulfilling C++'s ODR by having identical token sequences) functions, +; linked under LTO, get plausible debug info (and don't crash). + +; Built from source: +; $ clang++ a.cpp b.cpp -g -c -emit-llvm +; $ llvm-link a.bc b.bc -o ab.bc + +; This change is intended to tickle a case where the subprogram MDNode +; associated with the llvm::Function will differ from the subprogram +; referenced by the DbgLocs in the function. + +; $ sed -ie "s/!12, !0/!0, !12/" ab.ll +; $ cat a.cpp +; inline int func(int i) { +; return i * 2; +; } +; int (*x)(int) = &func; +; $ cat b.cpp +; inline int func(int i) { +; return i * 2; +; } +; int (*y)(int) = &func; + +; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "func" +; CHECK: DW_TAG_compile_unit + +; FIXME: Maybe we should drop the subprogram here - since the function was +; emitted in one CU, due to linkonce_odr uniquing. We certainly don't emit the +; subprogram here if the source location for this definition is the same (see +; test/DebugInfo/cross-cu-linkonce.ll), though it's very easy to tickle that +; into failing even without duplicating the source as has been done in this +; case (two cpp files in different directories, including the same header that +; contains an inline function - clang will produce distinct subprogram metadata +; that won't deduplicate owing to the file location information containing the +; directory of the source file even though the file name is absolute, not +; relative) + +; CHECK: DW_TAG_subprogram + +@x = global i32 (i32)* @_Z4funci, align 8 +@y = global i32 (i32)* @_Z4funci, align 8 + +; Function Attrs: inlinehint nounwind uwtable +define linkonce_odr i32 @_Z4funci(i32 %i) #0 { + %1 = alloca i32, align 4 + store i32 %i, i32* %1, align 4 + call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !22), !dbg !23 + %2 = load i32* %1, align 4, !dbg !24 + %3 = mul nsw i32 %2, 2, !dbg !24 + ret i32 %3, !dbg !24 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata) #1 + +attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!12, !0} +!llvm.module.flags = !{!19, !20} +!llvm.ident = !{!21, !21} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/a.cpp] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8, metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{metadata !10} +!10 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 4, metadata !11, i32 0, i32 1, i32 (i32)** @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def] +!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!12 = metadata !{i32 786449, metadata !13, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !14, metadata !17, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus] +!13 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"} +!14 = metadata !{metadata !15} +!15 = metadata !{i32 786478, metadata !13, metadata !16, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func] +!16 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ] [/tmp/dbginfo/b.cpp] +!17 = metadata !{metadata !18} +!18 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !16, i32 4, metadata !11, i32 0, i32 1, i32 (i32)** @y, null} ; [ DW_TAG_variable ] [y] [line 4] [def] +!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!21 = metadata !{metadata !"clang version 3.5.0 "} +!22 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1] +!23 = metadata !{i32 1, i32 0, metadata !4, null} +!24 = metadata !{i32 2, i32 0, metadata !4, null} diff --git a/test/DebugInfo/nodebug.ll b/test/DebugInfo/nodebug.ll new file mode 100644 index 000000000000..4d86b2463e44 --- /dev/null +++ b/test/DebugInfo/nodebug.ll @@ -0,0 +1,51 @@ +; REQUIRES: object-emission + +; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; Test that a nodebug function (a function not appearing in the debug info IR +; metadata subprogram list) with DebugLocs on its IR doesn't cause crashes/does +; the right thing. + +; Build with clang from the following: +; extern int i; +; inline __attribute__((always_inline)) void f1() { +; i = 3; +; } +; +; __attribute__((nodebug)) void f2() { +; f1(); +; } + +; Check that there's only one DW_TAG_subprogram, nothing for the 'f2' function. +; CHECK: DW_TAG_subprogram +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}} "f1" +; CHECK-NOT: DW_TAG_subprogram + +@i = external global i32 + +; Function Attrs: uwtable +define void @_Z2f2v() #0 { +entry: + store i32 3, i32* @i, align 4, !dbg !11 + ret void +} + +attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/nodebug.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"nodebug.cpp", metadata !"/tmp/dbginfo"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [f1] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/nodebug.cpp] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{null} +!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!9 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!10 = metadata !{metadata !"clang version 3.5.0 "} +!11 = metadata !{i32 3, i32 0, metadata !4, null} From 515f228ec35f0dbdb72df4f57f356e92b9dfda74 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 9 Jul 2014 22:53:04 +0000 Subject: [PATCH 0716/1155] [SDAG] Make the new zext-vector-inreg node default to expand so targets don't need to set it manually. This is based on feedback from Tom who pointed out that if every target needs to handle this we need to reach out to those maintainers. In fact, it doesn't make sense to duplicate everything when anything other than expand seems unlikely at this stage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212661 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetLoweringBase.cpp | 5 ++++- lib/Target/X86/X86ISelLowering.cpp | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index cc4445fc4c02..71c609441bb6 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -743,8 +743,11 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT >= MVT::FIRST_VECTOR_VALUETYPE && - VT <= MVT::LAST_VECTOR_VALUETYPE) + VT <= MVT::LAST_VECTOR_VALUETYPE) { setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + } } // Most targets ignore the @llvm.prefetch intrinsic. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5f72d5004fd1..67d2f8e75ed7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -869,7 +869,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::TRUNCATE, VT, Expand); setOperationAction(ISD::SIGN_EXTEND, VT, Expand); setOperationAction(ISD::ZERO_EXTEND, VT, Expand); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); From 706cbb3337802b0e8e42df1dea937d4f4797ec68 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 10 Jul 2014 01:30:39 +0000 Subject: [PATCH 0717/1155] [dfsan] Handle bitcast aliases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212668 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 4 ++-- test/Instrumentation/DataFlowSanitizer/prefix-rename.ll | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 3b9212a14e66..799e14bdac31 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -602,7 +602,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { ++i; // Don't stop on weak. We assume people aren't playing games with the // instrumentedness of overridden weak aliases. - if (Function *F = dyn_cast(GA->getAliasee())) { + if (auto F = dyn_cast(GA->getBaseObject())) { bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); if (GAInst && FInst) { addGlobalNamePrefix(GA); @@ -612,7 +612,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // below will take care of instrumenting it. Function *NewF = buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); - GA->replaceAllUsesWith(NewF); + GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType())); NewF->takeName(GA); GA->eraseFromParent(); FnsToInstrument.push_back(NewF); diff --git a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll index 1a5646074d21..f3c36b17b388 100644 --- a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll +++ b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll @@ -8,7 +8,15 @@ module asm ".symver f1,f@@version1" ; CHECK: @"dfs$f2" = alias {{.*}} @"dfs$f1" @f2 = alias void ()* @f1 +; CHECK: @"dfs$g2" = alias {{.*}} @"dfs$g1" +@g2 = alias bitcast (void (i8*)* @g1 to void (i16*)*) + ; CHECK: define void @"dfs$f1" define void @f1() { ret void } + +; CHECK: define void @"dfs$g1" +define void @g1(i8*) { + ret void +} From c4788790f6c11414fb0ab97a2979f76e54dcb580 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 10 Jul 2014 02:24:26 +0000 Subject: [PATCH 0718/1155] [x86] Refactor some of the new code for lowering v16i8 shuffles to remove duplication and make it easier to select different strategies. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 67d2f8e75ed7..140b2a830ce0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7792,19 +7792,6 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds); } - SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL); - SDValue LoV1 = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V1, Zero)); - SDValue HiV1 = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V1, Zero)); - SDValue LoV2 = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V2, Zero)); - SDValue HiV2 = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V2, Zero)); int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; int V1HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; @@ -7826,10 +7813,23 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, buildBlendMasks(LoMask, V1LoBlendMask, V2LoBlendMask); buildBlendMasks(HiMask, V1HiBlendMask, V2HiBlendMask); - SDValue V1Lo = DAG.getVectorShuffle(MVT::v8i16, DL, LoV1, HiV1, V1LoBlendMask); - SDValue V2Lo = DAG.getVectorShuffle(MVT::v8i16, DL, LoV2, HiV2, V2LoBlendMask); - SDValue V1Hi = DAG.getVectorShuffle(MVT::v8i16, DL, LoV1, HiV1, V1HiBlendMask); - SDValue V2Hi = DAG.getVectorShuffle(MVT::v8i16, DL, LoV2, HiV2, V2HiBlendMask); + SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL); + + auto buildLoAndHiV8s = + [&](SDValue V, ArrayRef LoBlendMask, ArrayRef HiBlendMask) { + SDValue LoV = + DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); + SDValue HiV = + DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); + SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, LoBlendMask); + SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, HiBlendMask); + return std::make_pair(BlendedLo, BlendedHi); + }; + SDValue V1Lo, V1Hi, V2Lo, V2Hi; + std::tie(V1Lo, V1Hi) = buildLoAndHiV8s(V1, V1LoBlendMask, V1HiBlendMask); + std::tie(V2Lo, V2Hi) = buildLoAndHiV8s(V2, V2LoBlendMask, V2HiBlendMask); SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Lo, V2Lo, LoMask); SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Hi, V2Hi, HiMask); From 985d66c081a3da2a5a29023f6e361eb0930d818e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Jul 2014 03:22:16 +0000 Subject: [PATCH 0719/1155] Fix types in documentation. The examples were using f32, but the IR type is called float git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212675 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 9886128b77d3..cc9656a7bfa4 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -8651,7 +8651,7 @@ Syntax: :: - declare i16 @llvm.convert.to.fp16(f32 %a) + declare i16 @llvm.convert.to.fp16(float %a) Overview: """"""""" @@ -8679,7 +8679,7 @@ Examples: .. code-block:: llvm - %res = call i16 @llvm.convert.to.fp16(f32 %a) + %res = call i16 @llvm.convert.to.fp16(float %a) store i16 %res, i16* @x, align 2 .. _int_convert_from_fp16: @@ -8692,7 +8692,7 @@ Syntax: :: - declare f32 @llvm.convert.from.fp16(i16 %a) + declare float @llvm.convert.from.fp16(i16 %a) Overview: """"""""" @@ -8721,7 +8721,7 @@ Examples: .. code-block:: llvm %a = load i16* @x, align 2 - %res = call f32 @llvm.convert.from.fp16(i16 %a) + %res = call float @llvm.convert.from.fp16(i16 %a) Debugger Intrinsics ------------------- From 425ef825a68916e5cd9a82731dcb0b87da440ecc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Jul 2014 03:22:20 +0000 Subject: [PATCH 0720/1155] R600/SI: Add support for llvm.convert.{to|from}.fp16 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212676 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 8 ++++++-- test/CodeGen/R600/fp16_to_fp32.ll | 14 ++++++++++++++ test/CodeGen/R600/fp32_to_fp16.ll | 14 ++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/R600/fp16_to_fp32.ll create mode 100644 test/CodeGen/R600/fp32_to_fp16.ll diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 291d63455c91..b3b44e2b6801 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1078,8 +1078,12 @@ defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", [(set i32:$dst, (fp_to_sint f32:$src0))] >; defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; -////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; -//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; +defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32", + [(set i32:$dst, (f32_to_f16 f32:$src0))] +>; +defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", + [(set f32:$dst, (f16_to_f32 i32:$src0))] +>; //defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; //defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; //defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; diff --git a/test/CodeGen/R600/fp16_to_fp32.ll b/test/CodeGen/R600/fp16_to_fp32.ll new file mode 100644 index 000000000000..fa2e379ef7b4 --- /dev/null +++ b/test/CodeGen/R600/fp16_to_fp32.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare i16 @llvm.convert.to.fp16(float) nounwind readnone + +; SI-LABEL: @test_convert_fp16_to_fp32: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]] +; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +define void @test_convert_fp16_to_fp32(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %val = load float addrspace(1)* %in, align 4 + %cvt = call i16 @llvm.convert.to.fp16(float %val) nounwind readnone + store i16 %cvt, i16 addrspace(1)* %out, align 2 + ret void +} diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll new file mode 100644 index 000000000000..9997cd3c08f1 --- /dev/null +++ b/test/CodeGen/R600/fp32_to_fp16.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.convert.from.fp16(i16) nounwind readnone + +; SI-LABEL: @test_convert_fp16_to_fp32: +; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]] +; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_DWORD [[RESULT]] +define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { + %val = load i16 addrspace(1)* %in, align 2 + %cvt = call float @llvm.convert.from.fp16(i16 %val) nounwind readnone + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} From a3c15c19b8f3d01f94f318e0ba5a728ae92f2746 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Thu, 10 Jul 2014 03:41:50 +0000 Subject: [PATCH 0721/1155] [AArch64]Fix an assertion failure in DAG Combiner about concating 2 build_vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212677 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 22 ++++++++++++++++++---- test/CodeGen/AArch64/arm64-build-vector.ll | 16 +++++++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ab07954a6f16..1df041dcdd0f 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10390,10 +10390,24 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SmallVector Opnds; unsigned BuildVecNumElts = N0.getNumOperands(); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); + EVT SclTy0 = N0.getOperand(0)->getValueType(0); + EVT SclTy1 = N1.getOperand(0)->getValueType(0); + if (SclTy0.isFloatingPoint()) { + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + } else { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smaller type and truncate all operands to it. + EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N0.getOperand(i))); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N1.getOperand(i))); + } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll index 7a3c8cec7d9c..d0f6db080551 100644 --- a/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/test/CodeGen/AArch64/arm64-build-vector.ll @@ -42,4 +42,18 @@ define <8 x i16> @build_all_zero(<8 x i16> %a) #1 { %b = add <8 x i16> %a, %c = mul <8 x i16> %b, ret <8 x i16> %c - } +} + +; There is an optimization in DAG Combiner as following: +; fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) +; -> (BUILD_VECTOR A, B, ..., C, D, ...) +; This case checks when A,B and C,D are different types, there should be no +; assertion failure. +define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) { +; CHECK-LABEL: concat_2_build_vector: +; CHECK: movi + %vshl_n = shl <4 x i16> %in0, + %vshl_n2 = shl <4 x i16> %vshl_n, + %shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %shuffle.i +} \ No newline at end of file From 0f29cefbc253b85272e78595053607ff9206202b Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 10 Jul 2014 03:55:02 +0000 Subject: [PATCH 0722/1155] SpecialCaseList: use std::unique_ptr. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212678 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/SpecialCaseList.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index d9921ac69205..a81ba23ff6d6 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -35,9 +35,7 @@ namespace llvm { /// literal strings than Regex. struct SpecialCaseList::Entry { StringSet<> Strings; - Regex *RegEx; - - Entry() : RegEx(nullptr) {} + std::unique_ptr RegEx; bool match(StringRef Query) const { return Strings.count(Query) || (RegEx && RegEx->match(Query)); @@ -147,23 +145,13 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { for (StringMap::const_iterator II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { - Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); + Entries[I->getKey()][II->getKey()].RegEx.reset(new Regex(II->getValue())); } } return true; } -SpecialCaseList::~SpecialCaseList() { - for (StringMap >::iterator I = Entries.begin(), - E = Entries.end(); - I != E; ++I) { - for (StringMap::const_iterator II = I->second.begin(), - IE = I->second.end(); - II != IE; ++II) { - delete II->second.RegEx; - } - } -} +SpecialCaseList::~SpecialCaseList() {} bool SpecialCaseList::inSection(const StringRef Section, const StringRef Query, const StringRef Category) const { From ac37f731e78134602ac4b8f7a2ac42a3c4ff4a8f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 10 Jul 2014 04:29:06 +0000 Subject: [PATCH 0723/1155] Explicitly define move constructor and move assignment operator to appease MSVC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212679 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/SpecialCaseList.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index a81ba23ff6d6..43824891af38 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -34,6 +34,15 @@ namespace llvm { /// reason for doing so is efficiency; StringSet is much faster at matching /// literal strings than Regex. struct SpecialCaseList::Entry { + Entry() {} + Entry(Entry &&Other) + : Strings(std::move(Other.Strings)), RegEx(std::move(Other.RegEx)) {} + Entry &operator=(Entry &&Other) { + Strings = std::move(Other.Strings); + RegEx = std::move(Other.RegEx); + return *this; + } + StringSet<> Strings; std::unique_ptr RegEx; From 95b14b00dae387a1a2035a1fcaae37a00fef8b2e Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 10 Jul 2014 04:34:06 +0000 Subject: [PATCH 0724/1155] [x86] Initial improvements to the new shuffle lowering for v16i8 shuffles specifically for cases where a small subset of the elements in the input vector are actually used. This is specifically targetted at improving the shuffles generated for trunc operations, but also helps out splat-like operations. There is still some really low-hanging fruit here that I want to address but this is a huge step in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212680 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 46 +++++++++++++++++----- test/CodeGen/X86/vector-shuffle-128-v16.ll | 38 ++++++++++-------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 140b2a830ce0..57b21cce12c4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7815,16 +7815,42 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL); - auto buildLoAndHiV8s = - [&](SDValue V, ArrayRef LoBlendMask, ArrayRef HiBlendMask) { - SDValue LoV = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); - SDValue HiV = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); - SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, LoBlendMask); - SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, HiBlendMask); + auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef LoBlendMask, + MutableArrayRef HiBlendMask) { + SDValue V1, V2; + // Check if any of the odd lanes in the v16i8 are used. If not, we can mask + // them out and avoid using UNPCK{L,H} to extract the elements of V as + // i16s. + if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(), + [](int M) { return M >= 0 && M % 2 == 1; }) && + std::none_of(HiBlendMask.begin(), HiBlendMask.end(), + [](int M) { return M >= 0 && M % 2 == 1; })) { + // Use a mask to drop the high bytes. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); + V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1, + DAG.getConstant(0x00FF, MVT::v8i16)); + + // This will be a single vector shuffle instead of a blend so nuke V2. + V2 = DAG.getUNDEF(MVT::v8i16); + + // Squash the masks to point directly into V1. + for (int &M : LoBlendMask) + if (M >= 0) + M /= 2; + for (int &M : HiBlendMask) + if (M >= 0) + M /= 2; + } else { + // Otherwise just unpack the low half of V into V1 and the high half into + // V2 so that we can blend them as i16s. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); + V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); + } + + SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask); + SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask); return std::make_pair(BlendedLo, BlendedHi); }; SDValue V1Lo, V1Hi, V2Lo, V2Hi; diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 0c317c3dd22f..85d2bb56292f 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -30,11 +30,9 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01( define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 ; CHECK-SSE2: # BB#0: -; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 -; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 -; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2 -; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 -; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0 +; CHECK-SSE2-NEXT: pand +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] @@ -60,18 +58,11 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03( define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 ; CHECK-SSE2: # BB#0: -; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 -; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 -; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm2 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm2[0,2,2,3] -; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[0,2,2,3,4,5,6,7] -; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] -; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] -; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,2,3,4,5,6,7] -; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,7] +; CHECK-SSE2-NEXT: pand +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,1] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,3,3,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,6,7] ; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,1] @@ -189,3 +180,16 @@ define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) { %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } + +define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { +; CHECK-SSE2-LABEL: @trunc_v4i32_shuffle +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: pand +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] +; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle +} From 141d3e3015f67604d122618f1ba97c41aaab80b8 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 10 Jul 2014 04:39:40 +0000 Subject: [PATCH 0725/1155] Remove move assignment operator to appease older GCCs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212682 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/SpecialCaseList.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index 43824891af38..21e43c5e7035 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -37,11 +37,6 @@ struct SpecialCaseList::Entry { Entry() {} Entry(Entry &&Other) : Strings(std::move(Other.Strings)), RegEx(std::move(Other.RegEx)) {} - Entry &operator=(Entry &&Other) { - Strings = std::move(Other.Strings); - RegEx = std::move(Other.RegEx); - return *this; - } StringSet<> Strings; std::unique_ptr RegEx; From 09f505a0a551743e1c89463dabc4ed407eb271cf Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Thu, 10 Jul 2014 04:50:06 +0000 Subject: [PATCH 0726/1155] MC: add and use an accessor for WinCFI This adds a utility method to access the WinCFI information in bulk and uses that to iterate rather than requesting the count and individually iterating them. This is in preparation for restructuring WinCFI handling to enable more clear sharing across architectures to enable unwind information emission for Windows on ARM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212683 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCStreamer.h | 4 ++++ lib/MC/MCWin64EH.cpp | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 5bc67adb3556..216de7555475 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -243,6 +243,10 @@ class MCStreamer { return *W64UnwindInfos[i]; } + ArrayRef getW64UnwindInfos() const { + return W64UnwindInfos; + } + void generateCompactUnwindEncodings(MCAsmBackend *MAB); /// @name Assembly File Formatting. diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index b8b07d3a1808..55e5198ae489 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -274,23 +274,23 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer, llvm::EmitUnwindInfo(streamer, info); } -void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) { - MCContext &context = streamer.getContext(); +void MCWin64EHUnwindEmitter::Emit(MCStreamer &Streamer) { + MCContext &Context = Streamer.getContext(); + // Emit the unwind info structs first. - for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { - MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); - const MCSection *xdataSect = - getWin64EHTableSection(GetSectionSuffix(info.Function), context); - streamer.SwitchSection(xdataSect); - llvm::EmitUnwindInfo(streamer, &info); + for (const auto &CFI : Streamer.getW64UnwindInfos()) { + const MCSection *XData = + getWin64EHTableSection(GetSectionSuffix(CFI->Function), Context); + Streamer.SwitchSection(XData); + EmitUnwindInfo(Streamer, CFI); } + // Now emit RUNTIME_FUNCTION entries. - for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { - MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); - const MCSection *pdataSect = - getWin64EHFuncTableSection(GetSectionSuffix(info.Function), context); - streamer.SwitchSection(pdataSect); - EmitRuntimeFunction(streamer, &info); + for (const auto &CFI : Streamer.getW64UnwindInfos()) { + const MCSection *PData = + getWin64EHFuncTableSection(GetSectionSuffix(CFI->Function), Context); + Streamer.SwitchSection(PData); + EmitRuntimeFunction(Streamer, CFI); } } From 344517923c49788bbf7a49e75066e20cd674e8fd Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Thu, 10 Jul 2014 04:50:09 +0000 Subject: [PATCH 0727/1155] MC: modernise for loop Convert a for loop to range bsaed form. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCWin64EH.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index 55e5198ae489..bb65164787a0 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -20,34 +20,30 @@ namespace llvm { // NOTE: All relocations generated here are 4-byte image-relative. -static uint8_t CountOfUnwindCodes(std::vector &instArray){ - uint8_t count = 0; - for (std::vector::const_iterator I = instArray.begin(), - E = instArray.end(); I != E; ++I) { - switch (I->getOperation()) { +static uint8_t CountOfUnwindCodes(std::vector &Insns) { + uint8_t Count = 0; + for (const auto &I : Insns) { + switch (I.getOperation()) { case Win64EH::UOP_PushNonVol: case Win64EH::UOP_AllocSmall: case Win64EH::UOP_SetFPReg: case Win64EH::UOP_PushMachFrame: - count += 1; + Count += 1; break; case Win64EH::UOP_SaveNonVol: case Win64EH::UOP_SaveXMM128: - count += 2; + Count += 2; break; case Win64EH::UOP_SaveNonVolBig: case Win64EH::UOP_SaveXMM128Big: - count += 3; + Count += 3; break; case Win64EH::UOP_AllocLarge: - if (I->getSize() > 512*1024-8) - count += 3; - else - count += 2; + Count += (I.getSize() > 512 * 1024 - 8) ? 3 : 2; break; } } - return count; + return Count; } static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs, From a739834446a6d057d1af7f5eb172bac3204e757b Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 10 Jul 2014 05:27:53 +0000 Subject: [PATCH 0728/1155] Allow isDereferenceablePointer to look through some bitcasts isDereferenceablePointer should not give up upon encountering any bitcast. If we're casting from a pointer to a larger type to a pointer to a small type, we can continue by examining the bitcast's operand. This missing capability was noted in a comment in the function. In order for this to work, isDereferenceablePointer now takes an optional DataLayout pointer (essentially all callers already had such a pointer available). Most code uses isDereferenceablePointer though isSafeToSpeculativelyExecute (which already took an optional DataLayout pointer), and to enable the LICM test case, LICM needs to actually provide its DL pointer to isSafeToSpeculativelyExecute (which it was not doing previously). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212686 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/Value.h | 2 +- lib/Analysis/ValueTracking.cpp | 2 +- lib/IR/Value.cpp | 31 +++- lib/Transforms/IPO/ArgumentPromotion.cpp | 14 +- lib/Transforms/Scalar/LICM.cpp | 2 +- lib/Transforms/Scalar/SROA.cpp | 6 +- .../Scalar/ScalarReplAggregates.cpp | 6 +- test/Transforms/LICM/hoist-bitcast-load.ll | 160 ++++++++++++++++++ 8 files changed, 202 insertions(+), 21 deletions(-) create mode 100644 test/Transforms/LICM/hoist-bitcast-load.ll diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index aae39ccbbfb1..b5bbc96eac2a 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -430,7 +430,7 @@ class Value { /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. - bool isDereferenceablePointer() const; + bool isDereferenceablePointer(const DataLayout *DL = nullptr) const; /// DoPHITranslation - If this value is a PHI node with CurBB as its parent, /// return the value in the PHI node corresponding to PredBB. If not, return diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index e66445454171..5264745ca0a6 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2007,7 +2007,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; - return LI->getPointerOperand()->isDereferenceablePointer(); + return LI->getPointerOperand()->isDereferenceablePointer(TD); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 463024a28c76..d61b8e52ea8f 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InstrTypes.h" @@ -472,18 +473,32 @@ Value *Value::stripInBoundsOffsets() { /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -static bool isDereferenceablePointer(const Value *V, +static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, SmallPtrSet &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - // It's also not always safe to follow a bitcast, for example: - // bitcast i8* (alloca i8) to i32* - // would result in a 4-byte load from a 1-byte alloca. Some cases could - // be handled using DataLayout to check sizes and alignments though. // These are obviously ok. if (isa(V)) return true; + // It's not always safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. However, + // if we're casting from a pointer from a type of larger size + // to a type of smaller size (or the same size), and the alignment + // is at least as large as for the resulting pointer type, then + // we can look through the bitcast. + if (DL) + if (const BitCastInst* BC = dyn_cast(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if ((DL->getTypeStoreSize(STy) >= + DL->getTypeStoreSize(DTy)) && + (DL->getABITypeAlignment(STy) >= + DL->getABITypeAlignment(DTy))) + return isDereferenceablePointer(BC->getOperand(0), DL, Visited); + } + // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast(V)) return !GV->hasExternalWeakLinkage(); @@ -497,7 +512,7 @@ static bool isDereferenceablePointer(const Value *V, // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(GEP->getOperand(0))) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), Visited)) + if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited)) return false; // Check the indices. gep_type_iterator GTI = gep_type_begin(GEP); @@ -533,9 +548,9 @@ static bool isDereferenceablePointer(const Value *V, /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -bool Value::isDereferenceablePointer() const { +bool Value::isDereferenceablePointer(const DataLayout *DL) const { SmallPtrSet Visited; - return ::isDereferenceablePointer(this, Visited); + return ::isDereferenceablePointer(this, DL, Visited); } /// DoPHITranslation - If this value is a PHI node with CurBB as its parent, diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 97a119b01bc8..f9de54a173d1 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -68,13 +69,14 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) { + : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector IndicesVector; + const DataLayout *DL; private: CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; @@ -103,6 +105,9 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -218,7 +223,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg, + const DataLayout *DL) { Function *Callee = Arg->getParent(); unsigned ArgNo = Arg->getArgNo(); @@ -229,7 +235,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { CallSite CS(U); assert(CS && "Should only have direct calls!"); - if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL)) return false; } return true; @@ -337,7 +343,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index bc1db3733138..abcceb20050a 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -639,7 +639,7 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst)) + if (isSafeToSpeculativelyExecute(&Inst, DL)) return true; return isGuaranteedToExecute(Inst); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 6532b7a09b99..8c7f253290ba 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1130,7 +1130,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL)) continue; @@ -1198,8 +1198,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = nullptr) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); - bool TDerefable = TValue->isDereferenceablePointer(); - bool FDerefable = FValue->isDereferenceablePointer(); + bool TDerefable = TValue->isDereferenceablePointer(DL); + bool FDerefable = FValue->isDereferenceablePointer(DL); for (User *U : SI.users()) { LoadInst *LI = dyn_cast(U); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 58192fc02be4..e2a24a7fd4a7 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1142,8 +1142,8 @@ class AllocaPromoter : public LoadAndStorePromoter { /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { - bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); - bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL); for (User *U : SI->users()) { LoadInst *LI = dyn_cast(U); @@ -1226,7 +1226,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL)) continue; diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll new file mode 100644 index 000000000000..bb105e59edf5 --- /dev/null +++ b/test/Transforms/LICM/hoist-bitcast-load.ll @@ -0,0 +1,160 @@ +; RUN: opt -S -basicaa -licm < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Make sure the basic alloca pointer hoisting works: +; CHECK-LABEL: @test1 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test1(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %c = alloca i32 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting works through a bitcast to a +; pointer to a smaller type: +; CHECK-LABEL: @test2 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test2(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i64 + %c = bitcast i64* %ca to i32* + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting works through a bitcast to a +; pointer to a smaller type (where the bitcast also needs to be hoisted): +; CHECK-LABEL: @test3 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test3(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i64 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %c = bitcast i64* %ca to i32* + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting does not happen through a bitcast +; to a pointer to a larger type: +; CHECK-LABEL: @test4 +; CHECK: for.body: +; CHECK: load i32* %c, align 4 + +; Function Attrs: nounwind uwtable +define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i16 + %c = bitcast i16* %ca to i32* + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { nounwind uwtable } + From 5ee0267e4931378841066abc7aa8dec7f1d82abb Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 10 Jul 2014 06:06:11 +0000 Subject: [PATCH 0729/1155] Fix isDereferenceablePointer not to try to take the size of an unsized type. I'll add a test-case shortly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212687 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Value.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index d61b8e52ea8f..35c241a608ba 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -492,7 +492,8 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, if (const BitCastInst* BC = dyn_cast(V)) { Type *STy = BC->getSrcTy()->getPointerElementType(), *DTy = BC->getDestTy()->getPointerElementType(); - if ((DL->getTypeStoreSize(STy) >= + if (STy->isSized() && DTy->isSized() && + (DL->getTypeStoreSize(STy) >= DL->getTypeStoreSize(DTy)) && (DL->getABITypeAlignment(STy) >= DL->getABITypeAlignment(DTy))) From 8892dbf2bbd0eafb0e5ca29eab454d0b691fa9bb Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 10 Jul 2014 07:04:37 +0000 Subject: [PATCH 0730/1155] A test case for not asserting in isDereferenceablePointer upon unsized types This is the test case for r212687. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212688 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/LICM/hoist-bitcast-load.ll | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll index bb105e59edf5..639dca5f17e0 100644 --- a/test/Transforms/LICM/hoist-bitcast-load.ll +++ b/test/Transforms/LICM/hoist-bitcast-load.ll @@ -156,5 +156,46 @@ for.end: ; preds = %for.inc, %entry ret void } +; Don't crash on bitcasts to unsized types. +; CHECK-LABEL: @test5 +; CHECK: for.body: +; CHECK: load i32* %c, align 4 + +%atype = type opaque + +; Function Attrs: nounwind uwtable +define void @test5(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i16 + %cab = bitcast i16* %ca to %atype* + %c = bitcast %atype* %cab to i32* + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + attributes #0 = { nounwind uwtable } From dc90a3ab8ffc841a442888940635306de6131d2f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 10 Jul 2014 09:16:40 +0000 Subject: [PATCH 0731/1155] [x86] Tweak the v16i8 single input special case lowering for shuffles that splat i8s into i16s. Previously, we would try much too hard to arrange a sequence of i8s in one half of the input such that we could unpack them into i16s and shuffle those into place. This isn't always going to be a cheaper i8 shuffle than our other strategies. The case where it is always going to be cheaper is when we can arrange all the necessary inputs into one half using just i16 shuffles. It happens that viewing the problem this way also makes it much easier to produce an efficient set of shuffles to move the inputs into one half and then unpack them. With this, our splat code gets one step closer to being not terrible with the new experimental lowering strategy. It also exposes two combines missing which I will add next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 78 ++++++++++++---------- test/CodeGen/X86/vector-shuffle-128-v16.ll | 18 ++--- 2 files changed, 51 insertions(+), 45 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 57b21cce12c4..38551dbba29d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7694,6 +7694,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isSingleInputShuffleMask(Mask)) { // Check whether we can widen this to an i16 shuffle by duplicating bytes. // Notably, this handles splat and partial-splat shuffles more efficiently. + // However, it only makes sense if the pre-duplication shuffle simplifies + // things significantly. Currently, this means we need to be able to + // express the pre-duplication shuffle as an i16 shuffle. // // FIXME: We should check for other patterns which can be widened into an // i16 shuffle as well. @@ -7704,7 +7707,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, } return true; }; - if (canWidenViaDuplication(Mask)) { + auto tryToWidenViaDuplication = [&]() -> SDValue { + if (!canWidenViaDuplication(Mask)) + return SDValue(); SmallVector LoInputs; std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs), [](int M) { return M >= 0 && M < 8; }); @@ -7722,52 +7727,57 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef InPlaceInputs = TargetLo ? LoInputs : HiInputs; ArrayRef MovingInputs = TargetLo ? HiInputs : LoInputs; - int ByteMask[16]; + int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; SmallDenseMap LaneMap; - for (int i = 0; i < 16; ++i) - ByteMask[i] = -1; for (int I : InPlaceInputs) { - ByteMask[I] = I; + PreDupI16Shuffle[I/2] = I/2; LaneMap[I] = I; } - int FreeByteIdx = 0; - int TargetOffset = TargetLo ? 0 : 8; - for (int I : MovingInputs) { - // Walk the free index into the byte mask until we find an unoccupied - // spot. We bound this to 8 steps to catch bugs, the pigeonhole - // principle indicates that there *must* be a spot as we can only have - // 8 duplicated inputs. We have to walk the index using modular - // arithmetic to wrap around as necessary. - // FIXME: We could do a much better job of picking an inexpensive slot - // so this doesn't go through the worst case for the byte shuffle. - for (int j = 0; j < 8 && ByteMask[FreeByteIdx + TargetOffset] != -1; - ++j, FreeByteIdx = (FreeByteIdx + 1) % 8) - ; - assert(ByteMask[FreeByteIdx + TargetOffset] == -1 && - "Failed to find a free byte!"); - ByteMask[FreeByteIdx + TargetOffset] = I; - LaneMap[I] = FreeByteIdx + TargetOffset; + int j = TargetLo ? 0 : 4, je = j + 4; + for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) { + // Check if j is already a shuffle of this input. This happens when + // there are two adjacent bytes after we move the low one. + if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) { + // If we haven't yet mapped the input, search for a slot into which + // we can map it. + while (j < je && PreDupI16Shuffle[j] != -1) + ++j; + + if (j == je) + // We can't place the inputs into a single half with a simple i16 shuffle, so bail. + return SDValue(); + + // Map this input with the i16 shuffle. + PreDupI16Shuffle[j] = MovingInputs[i] / 2; + } + + // Update the lane map based on the mapping we ended up with. + LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; } - V1 = DAG.getVectorShuffle(MVT::v16i8, DL, V1, DAG.getUNDEF(MVT::v16i8), - ByteMask); - for (int &M : Mask) - if (M != -1) - M = LaneMap[M]; + V1 = DAG.getNode( + ISD::BITCAST, DL, MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, + DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, MVT::v16i8, V1, V1); - int I16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; for (int i = 0; i < 16; i += 2) { if (Mask[i] != -1) - I16Shuffle[i / 2] = Mask[i] - (TargetLo ? 0 : 8); - assert(I16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!"); + PostDupI16Shuffle[i / 2] = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); + assert(PostDupI16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!"); } - return DAG.getVectorShuffle(MVT::v8i16, DL, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), - DAG.getUNDEF(MVT::v8i16), I16Shuffle); - } + return DAG.getNode( + ISD::BITCAST, DL, MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, + DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); + }; + if (SDValue V = tryToWidenViaDuplication()) + return V; } // Check whether an interleaving lowering is likely to be more efficient. diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 85d2bb56292f..fa6bdf8b7296 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -30,14 +30,12 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01( define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 ; CHECK-SSE2: # BB#0: -; CHECK-SSE2-NEXT: pand ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] -; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6] ; CHECK-SSE2-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -58,16 +56,14 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03( define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 ; CHECK-SSE2: # BB#0: -; CHECK-SSE2-NEXT: pand +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,1] -; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,3,3,4,5,6,7] -; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,6,7] -; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7] ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,1] +; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3] ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7] -; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,6] +; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6] ; CHECK-SSE2-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle From 977aab501d864db76310461400168f7c8cefcf16 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 10 Jul 2014 09:57:36 +0000 Subject: [PATCH 0732/1155] [x86] Expand the target DAG combining for PSHUFD nodes to be able to combine into half-shuffles through unpack instructions that expand the half to a whole vector without messing with the dword lanes. This fixes some redundant instructions in splat-like lowerings for v16i8, which are now getting to be *really* nice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212695 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 35 +++++++++++++++++++++- test/CodeGen/X86/vector-shuffle-128-v16.ll | 5 +--- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 38551dbba29d..245cb414ca97 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18492,6 +18492,39 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, return false; continue; + + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: + // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword + // shuffle into a preceding word shuffle. + if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16) + return false; + + // Search for a half-shuffle which we can combine with. + unsigned CombineOp = + V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; + if (V.getOperand(0) != V.getOperand(1) || + !V->isOnlyUserOf(V.getOperand(0).getNode())) + return false; + V = V.getOperand(0); + do { + switch (V.getOpcode()) { + default: + return false; // Nothing to combine. + + case X86ISD::PSHUFLW: + case X86ISD::PSHUFHW: + if (V.getOpcode() == CombineOp) + break; + + // Fallthrough! + case ISD::BITCAST: + V = V.getOperand(0); + continue; + } + break; + } while (V.hasOneUse()); + break; } // Break out of the loop if we break out of the switch. break; @@ -18508,7 +18541,7 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, SmallVector VMask = getPSHUFShuffleMask(V); for (int &M : Mask) M = VMask[M]; - V = DAG.getNode(X86ISD::PSHUFD, DL, V.getValueType(), V.getOperand(0), + V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), getV4X86ShuffleImm8ForMask(Mask, DAG)); // It is possible that one of the combinable shuffles was completely absorbed diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index fa6bdf8b7296..71fac00021ff 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -31,9 +31,8 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08( ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 ; CHECK-SSE2: # BB#0: ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7] ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6] ; CHECK-SSE2-NEXT: retq @@ -59,9 +58,7 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12( ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7] ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3] ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7] ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6] ; CHECK-SSE2-NEXT: retq From b0b3161567dbb50f69b2acacada7b41917c16c6e Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 10 Jul 2014 10:18:12 +0000 Subject: [PATCH 0733/1155] Make it possible for ints/floats to return different values from getBooleanContents() Summary: On MIPS32r6/MIPS64r6, floating point comparisons return 0 or -1 but integer comparisons return 0 or 1. Updated the various uses of getBooleanContents. Two simplifications had to be disabled when float and int boolean contents differ: - ScalarizeVecRes_VSELECT except when the kind of boolean contents is trivially discoverable (i.e. when the condition of the VSELECT is a SETCC node). - visitVSELECT (select C, 0, 1) -> (xor C, 1). Come to think of it, this one could test for the common case of 'C' being a SETCC too. Preserved existing behaviour for all other targets and updated the affected MIPS32r6/MIPS64r6 tests. This also fixes the pi benchmark where the 'low' variable was counting in the wrong direction because it thought it could simply add the result of the comparison. Reviewers: hfinkel Reviewed By: hfinkel Subscribers: hfinkel, jholewinski, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D4389 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212697 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAG.h | 4 +- include/llvm/Target/TargetLowering.h | 33 +++- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 ++-- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 6 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 10 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 9 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- .../SelectionDAG/LegalizeVectorOps.cpp | 6 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 24 ++- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 34 +++- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 18 +- lib/CodeGen/TargetLoweringBase.cpp | 1 + lib/Target/Mips/MipsISelLowering.cpp | 5 + test/CodeGen/Mips/fcmp.ll | 168 ++++++++++++------ test/CodeGen/Mips/select.ll | 36 ++-- 15 files changed, 254 insertions(+), 137 deletions(-) diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index c2ae553c778d..0a31905a37f2 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -570,8 +570,8 @@ class SelectionDAG { /// getBoolExtOrTrunc - Convert Op, which must be of integer type, to the /// integer type VT, by using an extension appropriate for the target's - /// BooleanContent or truncating it. - SDValue getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT); + /// BooleanContent for type OpVT or truncating it. + SDValue getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, EVT OpVT); /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue getNOT(SDLoc DL, SDValue Val, EVT VT); diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 7caa55d39d69..5e9978d12e8f 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -284,8 +284,17 @@ class TargetLoweringBase { /// selects between the two kinds. For example on X86 a scalar boolean should /// be zero extended from i1, while the elements of a vector of booleans /// should be sign extended from i1. - BooleanContent getBooleanContents(bool isVec) const { - return isVec ? BooleanVectorContents : BooleanContents; + /// + /// Some cpus also treat floating point types the same way as they treat + /// vectors instead of the way they treat scalars. + BooleanContent getBooleanContents(bool isVec, bool isFloat) const { + if (isVec) + return BooleanVectorContents; + return isFloat ? BooleanFloatContents : BooleanContents; + } + + BooleanContent getBooleanContents(EVT Type) const { + return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); } /// Return target scheduling preference. @@ -950,9 +959,19 @@ class TargetLoweringBase { virtual void resetOperationActions() {} protected: - /// Specify how the target extends the result of a boolean value from i1 to a - /// wider type. See getBooleanContents. - void setBooleanContents(BooleanContent Ty) { BooleanContents = Ty; } + /// Specify how the target extends the result of integer and floating point + /// boolean values from i1 to a wider type. See getBooleanContents. + void setBooleanContents(BooleanContent Ty) { + BooleanContents = Ty; + BooleanFloatContents = Ty; + } + + /// Specify how the target extends the result of integer and floating point + /// boolean values from i1 to a wider type. See getBooleanContents. + void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { + BooleanContents = IntTy; + BooleanFloatContents = FloatTy; + } /// Specify how the target extends the result of a vector boolean value from a /// vector of i1 to a wider type. See getBooleanContents. @@ -1496,6 +1515,10 @@ class TargetLoweringBase { /// a type wider than i1. See getBooleanContents. BooleanContent BooleanContents; + /// Information about the contents of the high-bits in boolean values held in + /// a type wider than i1. See getBooleanContents. + BooleanContent BooleanFloatContents; + /// Information about the contents of the high-bits in boolean vector values /// when the element type is wider than i1. See getBooleanContents. BooleanContent BooleanVectorContents; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1df041dcdd0f..df273659e492 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3958,14 +3958,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // If setcc produces all-one true value then: // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<isConstant()) { - if (N0.getOpcode() == ISD::AND && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { + if (N0.getOpcode() == ISD::AND) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); BuildVectorSDNode *N01CV = dyn_cast(N01); - if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) { + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && + TLI.getBooleanContents(N00.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); if (C.getNode()) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); @@ -4524,11 +4524,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. if (VT.isInteger() && - (VT0 == MVT::i1 || - (VT0.isInteger() && - TLI.getBooleanContents(false) == - TargetLowering::ZeroOrOneBooleanContent)) && + (VT0 == MVT::i1 || (VT0.isInteger() && + TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(false, true) && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -5077,12 +5086,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + EVT N0VT = N0.getOperand(0).getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { - EVT N0VT = N0.getOperand(0).getValueType(); + TLI.getBooleanContents(N0VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. @@ -11243,8 +11252,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && - TLI.getBooleanContents(N0.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent) { + TLI.getBooleanContents(N0.getValueType()) == + TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b200adcae80f..c0e8c8c2b05b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3761,7 +3761,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType)); + Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); break; } case ISD::UADDO: @@ -3779,7 +3779,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); - Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType)); + Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); break; } case ISD::UMULO: @@ -3969,7 +3969,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; - switch (TLI.getBooleanContents(VT.isVector())) { + switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index d803976721c5..6feac0de73f4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -519,7 +519,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, OpTy); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, SDLoc(N), @@ -919,8 +919,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(MVT::Other); - SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other); // The chain (Op#0) and basic block destination (Op#2) are always legal types. return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, @@ -1013,9 +1012,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? - OpTy.getScalarType() : OpTy); - Cond = PromoteTargetBoolean(Cond, SVT); + EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), N->getOperand(2)), 0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 87ffa6a9406b..bd7dacf2bc69 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1065,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. -SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { +/// +/// ValVT is the type of values that produced the boolean. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { SDLoc dl(Bool); + EVT BoolVT = getSetCCResultType(ValVT); ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); - return DAG.getNode(ExtendCode, dl, VT, Bool); + TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); + return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index a6bbc218dd22..e52fb3c05e39 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -167,7 +167,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDNode *Node, bool isSigned); std::pair ExpandAtomic(SDNode *Node); - SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); + SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3fa64843a93a..25dc98e0eb9b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -793,9 +793,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getBooleanContents(true) != - TargetLowering::ZeroOrNegativeOneBooleanContent) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(Op1.getValueType()) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); // If the mask and the type are different sizes, unroll the vector op. This diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c50625e20805..415d4e3f5073 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -257,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDValue Cond = GetScalarizedVector(N->getOperand(0)); SDValue LHS = GetScalarizedVector(N->getOperand(1)); - TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); - TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0)->getValueType(0); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + if (ScalarBool != VecBool) { EVT CondVT = Cond.getValueType(); switch (ScalarBool) { @@ -357,7 +375,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { // Vectors may have a different boolean contents to scalars. Promote the // value appropriately. ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); return DAG.getNode(ExtendCode, DL, NVT, Res); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 119b0255e7c3..6a3fa7aef1fc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1012,11 +1012,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) { +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, + EVT OpVT) { if (VT.bitsLE(Op.getValueType())) return getNode(ISD::TRUNCATE, SL, VT, Op); - TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); return getNode(TLI->getExtendForContent(BType), SL, VT, Op); } @@ -1054,7 +1055,7 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue TrueValue; - switch (TLI->getBooleanContents(VT.isVector())) { + switch (TLI->getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = getConstant(1, VT); @@ -1776,7 +1777,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETTRUE: case ISD::SETTRUE2: { const TargetLowering *TLI = TM.getTargetLowering(); - TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + TLI->getBooleanContents(N1->getValueType(0)); return getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -2007,11 +2009,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::UMULO: if (Op.getResNo() != 1) break; - // The boolean result conforms to getBooleanContents. Fall through. + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::SHL: @@ -2410,9 +2421,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. Fall through. + // If setcc returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 578821a19f5c..42372a21ed7a 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1150,14 +1150,12 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; - bool IsVec = false; const ConstantSDNode *CN = dyn_cast(N); if (!CN) { const BuildVectorSDNode *BV = dyn_cast(N); if (!BV) return false; - IsVec = true; BitVector UndefElements; CN = BV->getConstantSplatNode(&UndefElements); // Only interested in constant splats, and we don't try to handle undef @@ -1166,7 +1164,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { return false; } - switch (getBooleanContents(IsVec)) { + switch (getBooleanContents(N->getValueType(0))) { case UndefinedBooleanContent: return CN->getAPIntValue()[0]; case ZeroOrOneBooleanContent: @@ -1182,14 +1180,12 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!N) return false; - bool IsVec = false; const ConstantSDNode *CN = dyn_cast(N); if (!CN) { const BuildVectorSDNode *BV = dyn_cast(N); if (!BV) return false; - IsVec = true; BitVector UndefElements; CN = BV->getConstantSplatNode(&UndefElements); // Only interested in constant splats, and we don't try to handle undef @@ -1198,7 +1194,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { return false; } - if (getBooleanContents(IsVec) == UndefinedBooleanContent) + if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) return !CN->getAPIntValue()[0]; return CN->isNullValue(); @@ -1219,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + getBooleanContents(N0->getValueType(0)); return DAG.getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1426,7 +1423,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), NewConst, Cond); - return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); } break; } @@ -1510,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } else if (N1C->getAPIntValue() == 1 && (VT == MVT::i1 || - getBooleanContents(false) == ZeroOrOneBooleanContent)) { + getBooleanContents(N0->getValueType(0)) == + ZeroOrOneBooleanContent)) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -1781,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType().isVector())) { + switch (getBooleanContents(N0.getValueType())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: EqVal = ISD::isTrueWhenEqual(Cond); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 71c609441bb6..afa58e780ffe 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -690,6 +690,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; + BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index edbba2b089c2..b7af2d4aaf48 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -215,6 +215,11 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + // The cmp.cond.fmt instruction in MIPS32r6/MIPS64r6 uses 0 and -1 like MSA + // does. Integer booleans still use 0 and 1. + if (Subtarget->hasMips32r6()) + setBooleanContents(ZeroOrOneBooleanContent, + ZeroOrNegativeOneBooleanContent); // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll index dce8a7d6da58..b7759831c5a2 100644 --- a/test/CodeGen/Mips/fcmp.ll +++ b/test/CodeGen/Mips/fcmp.ll @@ -29,10 +29,12 @@ define i32 @oeq_f32(float %a, float %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp oeq float %a, %b %2 = zext i1 %1 to i32 @@ -53,10 +55,12 @@ define i32 @ogt_f32(float %a, float %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ogt float %a, %b %2 = zext i1 %1 to i32 @@ -77,10 +81,12 @@ define i32 @oge_f32(float %a, float %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp oge float %a, %b %2 = zext i1 %1 to i32 @@ -101,10 +107,12 @@ define i32 @olt_f32(float %a, float %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.lt.s $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp olt float %a, %b %2 = zext i1 %1 to i32 @@ -125,10 +133,12 @@ define i32 @ole_f32(float %a, float %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.le.s $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ole float %a, %b %2 = zext i1 %1 to i32 @@ -150,11 +160,13 @@ define i32 @one_f32(float %a, float %b) nounwind { ; 32-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 32-CMP-DAG: not $2, $[[T1]] +; 32-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 32-CMP-DAG: andi $2, $[[T2]], 1 ; 64-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 64-CMP-DAG: not $2, $[[T1]] +; 64-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 64-CMP-DAG: andi $2, $[[T2]], 1 %1 = fcmp one float %a, %b %2 = zext i1 %1 to i32 @@ -176,11 +188,13 @@ define i32 @ord_f32(float %a, float %b) nounwind { ; 32-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 32-CMP-DAG: not $2, $[[T1]] +; 32-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 32-CMP-DAG: andi $2, $[[T2]], 1 ; 64-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 64-CMP-DAG: not $2, $[[T1]] +; 64-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 64-CMP-DAG: andi $2, $[[T2]], 1 %1 = fcmp ord float %a, %b %2 = zext i1 %1 to i32 @@ -201,10 +215,12 @@ define i32 @ueq_f32(float %a, float %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ueq.s $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ueq float %a, %b %2 = zext i1 %1 to i32 @@ -225,10 +241,12 @@ define i32 @ugt_f32(float %a, float %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ugt float %a, %b %2 = zext i1 %1 to i32 @@ -249,10 +267,12 @@ define i32 @uge_f32(float %a, float %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp uge float %a, %b %2 = zext i1 %1 to i32 @@ -273,10 +293,12 @@ define i32 @ult_f32(float %a, float %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ult.s $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ult float %a, %b %2 = zext i1 %1 to i32 @@ -297,10 +319,12 @@ define i32 @ule_f32(float %a, float %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ule.s $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ule float %a, %b %2 = zext i1 %1 to i32 @@ -322,11 +346,13 @@ define i32 @une_f32(float %a, float %b) nounwind { ; 32-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 32-CMP-DAG: not $2, $[[T1]] +; 32-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 32-CMP-DAG: andi $2, $[[T2]], 1 ; 64-CMP-DAG: cmp.eq.s $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 64-CMP-DAG: not $2, $[[T1]] +; 64-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 64-CMP-DAG: andi $2, $[[T2]], 1 %1 = fcmp une float %a, %b %2 = zext i1 %1 to i32 @@ -347,10 +373,12 @@ define i32 @uno_f32(float %a, float %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.un.s $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp uno float %a, %b %2 = zext i1 %1 to i32 @@ -389,10 +417,12 @@ define i32 @oeq_f64(double %a, double %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp oeq double %a, %b %2 = zext i1 %1 to i32 @@ -413,10 +443,12 @@ define i32 @ogt_f64(double %a, double %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ogt double %a, %b %2 = zext i1 %1 to i32 @@ -437,10 +469,12 @@ define i32 @oge_f64(double %a, double %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp oge double %a, %b %2 = zext i1 %1 to i32 @@ -461,10 +495,12 @@ define i32 @olt_f64(double %a, double %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.lt.d $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp olt double %a, %b %2 = zext i1 %1 to i32 @@ -485,10 +521,12 @@ define i32 @ole_f64(double %a, double %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.le.d $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ole double %a, %b %2 = zext i1 %1 to i32 @@ -510,11 +548,13 @@ define i32 @one_f64(double %a, double %b) nounwind { ; 32-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 32-CMP-DAG: not $2, $[[T1]] +; 32-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 32-CMP-DAG: andi $2, $[[T2]], 1 ; 64-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 64-CMP-DAG: not $2, $[[T1]] +; 64-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 64-CMP-DAG: andi $2, $[[T2]], 1 %1 = fcmp one double %a, %b %2 = zext i1 %1 to i32 @@ -536,11 +576,13 @@ define i32 @ord_f64(double %a, double %b) nounwind { ; 32-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 32-CMP-DAG: not $2, $[[T1]] +; 32-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 32-CMP-DAG: andi $2, $[[T2]], 1 ; 64-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 64-CMP-DAG: not $2, $[[T1]] +; 64-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 64-CMP-DAG: andi $2, $[[T2]], 1 %1 = fcmp ord double %a, %b %2 = zext i1 %1 to i32 @@ -561,10 +603,12 @@ define i32 @ueq_f64(double %a, double %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ueq.d $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ueq double %a, %b %2 = zext i1 %1 to i32 @@ -585,10 +629,12 @@ define i32 @ugt_f64(double %a, double %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ugt double %a, %b %2 = zext i1 %1 to i32 @@ -609,10 +655,12 @@ define i32 @uge_f64(double %a, double %b) nounwind { ; 64-C-DAG: movf $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f14, $f12 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f13, $f12 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp uge double %a, %b %2 = zext i1 %1 to i32 @@ -633,10 +681,12 @@ define i32 @ult_f64(double %a, double %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ult.d $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ult double %a, %b %2 = zext i1 %1 to i32 @@ -657,10 +707,12 @@ define i32 @ule_f64(double %a, double %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.ule.d $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp ule double %a, %b %2 = zext i1 %1 to i32 @@ -682,11 +734,13 @@ define i32 @une_f64(double %a, double %b) nounwind { ; 32-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f14 ; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 32-CMP-DAG: not $2, $[[T1]] +; 32-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 32-CMP-DAG: andi $2, $[[T2]], 1 ; 64-CMP-DAG: cmp.eq.d $[[T0:f[0-9]+]], $f12, $f13 ; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] -; 64-CMP-DAG: not $2, $[[T1]] +; 64-CMP-DAG: not $[[T2:[0-9]+]], $[[T1]] +; 64-CMP-DAG: andi $2, $[[T2]], 1 %1 = fcmp une double %a, %b %2 = zext i1 %1 to i32 @@ -707,10 +761,12 @@ define i32 @uno_f64(double %a, double %b) nounwind { ; 64-C-DAG: movt $[[T0]], $1, $fcc0 ; 32-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f14 -; 32-CMP-DAG: mfc1 $2, $[[T0]] +; 32-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 32-CMP-DAG: andi $2, $[[T1]], 1 ; 64-CMP-DAG: cmp.un.d $[[T0:f[0-9]+]], $f12, $f13 -; 64-CMP-DAG: mfc1 $2, $[[T0]] +; 64-CMP-DAG: mfc1 $[[T1:[0-9]+]], $[[T0]] +; 64-CMP-DAG: andi $2, $[[T1]], 1 %1 = fcmp uno double %a, %b %2 = zext i1 %1 to i32 diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll index 91471a2ce6bb..eb2198b36dff 100644 --- a/test/CodeGen/Mips/select.ll +++ b/test/CodeGen/Mips/select.ll @@ -516,9 +516,8 @@ entry: ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.eq.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 32R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 32R6: or $2, $[[NE]], $[[EQ]] @@ -532,9 +531,8 @@ entry: ; 64R6: cmp.eq.s $[[CC:f[0-9]+]], $f14, $f15 ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 64R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 64R6: or $2, $[[NE]], $[[EQ]] @@ -563,9 +561,8 @@ entry: ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.lt.s $[[CC:f[0-9]+]], $[[F2]], $[[F3]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 32R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 32R6: or $2, $[[NE]], $[[EQ]] @@ -579,9 +576,8 @@ entry: ; 64R6: cmp.lt.s $[[CC:f[0-9]+]], $f14, $f15 ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 64R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 64R6: or $2, $[[NE]], $[[EQ]] %cmp = fcmp olt float %f2, %f3 @@ -609,9 +605,8 @@ entry: ; 32R6-DAG: mtc1 $7, $[[F3:f[0-9]+]] ; 32R6: cmp.lt.s $[[CC:f[0-9]+]], $[[F3]], $[[F2]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 32R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 32R6: or $2, $[[NE]], $[[EQ]] @@ -625,9 +620,8 @@ entry: ; 64R6: cmp.lt.s $[[CC:f[0-9]+]], $f15, $f14 ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 64R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 64R6: or $2, $[[NE]], $[[EQ]] @@ -668,9 +662,8 @@ entry: ; 32R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) ; 32R6: cmp.eq.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 32R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 32R6: or $2, $[[NE]], $[[EQ]] @@ -702,9 +695,8 @@ entry: ; 64R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) ; 64R6: cmp.eq.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 64R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 64R6: or $2, $[[NE]], $[[EQ]] @@ -747,9 +739,8 @@ entry: ; 32R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) ; 32R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 32R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 32R6: or $2, $[[NE]], $[[EQ]] @@ -781,9 +772,8 @@ entry: ; 64R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) ; 64R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP]], $[[TMP1]] ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 64R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 64R6: or $2, $[[NE]], $[[EQ]] @@ -826,9 +816,8 @@ entry: ; 32R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) ; 32R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]] ; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 32R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 32R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 32R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 32R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 32R6: or $2, $[[NE]], $[[EQ]] @@ -860,9 +849,8 @@ entry: ; 64R6-DAG: ldc1 $[[TMP1:f[0-9]+]], 0($[[D3]]) ; 64R6: cmp.lt.d $[[CC:f[0-9]+]], $[[TMP1]], $[[TMP]] ; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] +; 64R6: andi $[[CCGPR]], $[[CCGPR]], 1 ; 64R6: seleqz $[[EQ:[0-9]+]], $5, $[[CCGPR]] -; FIXME: This move is redundant -; 64R6: mfc1 $[[CCGPR:[0-9]+]], $[[CC]] ; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]] ; 64R6: or $2, $[[NE]], $[[EQ]] From b350ec7ec6815c180f5de679f0a694a930df7af2 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 10 Jul 2014 10:45:11 +0000 Subject: [PATCH 0734/1155] [SystemZ] Fix FPR dwarf numbering The dwarf FPR numbers are supposed to have the order F0, F2, F4, F6, F1, F3, F5, F7, F8, etc., which matches the pairing of registers for long doubles. E.g. a long double stored in F0 is paired with F2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212701 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZRegisterInfo.td | 25 ++++++++++- test/DebugInfo/SystemZ/eh_frame.s | 51 +++++++++++++++++------ 2 files changed, 62 insertions(+), 14 deletions(-) diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 93d7c8375b3d..47ac20dae78a 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -119,6 +119,29 @@ defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>; // Floating-point registers //===----------------------------------------------------------------------===// +// Maps FPR register numbers to their DWARF encoding. +class DwarfMapping { int Id = id; } + +def F0Dwarf : DwarfMapping<16>; +def F2Dwarf : DwarfMapping<17>; +def F4Dwarf : DwarfMapping<18>; +def F6Dwarf : DwarfMapping<19>; + +def F1Dwarf : DwarfMapping<20>; +def F3Dwarf : DwarfMapping<21>; +def F5Dwarf : DwarfMapping<22>; +def F7Dwarf : DwarfMapping<23>; + +def F8Dwarf : DwarfMapping<24>; +def F10Dwarf : DwarfMapping<25>; +def F12Dwarf : DwarfMapping<26>; +def F14Dwarf : DwarfMapping<27>; + +def F9Dwarf : DwarfMapping<28>; +def F11Dwarf : DwarfMapping<29>; +def F13Dwarf : DwarfMapping<30>; +def F15Dwarf : DwarfMapping<31>; + // Lower 32 bits of one of the 16 64-bit floating-point registers class FPR32 num, string n> : SystemZReg { let HWEncoding = num; @@ -142,7 +165,7 @@ class FPR128 num, string n, FPR64 low, FPR64 high> foreach I = 0-15 in { def F#I#S : FPR32; def F#I#D : FPR64("F"#I#"S")>, - DwarfRegNum<[!add(I, 16)]>; + DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; } foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { diff --git a/test/DebugInfo/SystemZ/eh_frame.s b/test/DebugInfo/SystemZ/eh_frame.s index d55b6cdea8c3..6189b9019673 100644 --- a/test/DebugInfo/SystemZ/eh_frame.s +++ b/test/DebugInfo/SystemZ/eh_frame.s @@ -11,9 +11,25 @@ check_largest_class: .cfi_offset %r13, -56 .cfi_offset %r14, -48 .cfi_offset %r15, -40 - aghi %r15, -160 - .cfi_def_cfa_offset 320 - lmg %r13, %r15, 264(%r15) + aghi %r15, -224 + .cfi_def_cfa_offset 384 + std %f8, 160(%r15) + std %f9, 168(%r15) + std %f10, 176(%r15) + std %f11, 184(%r15) + std %f12, 192(%r15) + std %f13, 200(%r15) + std %f14, 208(%r15) + std %f15, 216(%r15) + .cfi_offset %f8, -224 + .cfi_offset %f9, -216 + .cfi_offset %f10, -208 + .cfi_offset %f11, -200 + .cfi_offset %f12, -192 + .cfi_offset %f13, -184 + .cfi_offset %f14, -176 + .cfi_offset %f15, -168 + lmg %r13, %r15, 328(%r15) br %r14 .size check_largest_class, .-check_largest_class .cfi_endproc @@ -22,7 +38,7 @@ check_largest_class: # # Contents of the .eh_frame section: # -# 00000000 0000001c 00000000 CIE +# 00000000 0000000000000014 00000000 CIE # Version: 3 # Augmentation: "zR" # Code alignment factor: 1 @@ -35,20 +51,29 @@ check_largest_class: # DW_CFA_nop # DW_CFA_nop # -# 00000020 0000001c 00000024 FDE cie=00000000 pc=00000000..00000012 -# DW_CFA_advance_loc: 6 to 00000006 +# 000000.. 000000000000002c 0000001c FDE cie=00000000 pc=0000000000000000..0000000000000032 +# DW_CFA_advance_loc: 6 to 0000000000000006 # DW_CFA_offset: r13 at cfa-56 # DW_CFA_offset: r14 at cfa-48 # DW_CFA_offset: r15 at cfa-40 -# DW_CFA_advance_loc: 4 to 0000000a -# DW_CFA_def_cfa_offset: 320 -# DW_CFA_nop +# DW_CFA_advance_loc: 4 to 000000000000000a +# DW_CFA_def_cfa_offset: 384 +# DW_CFA_advance_loc: 32 to 000000000000002a +# DW_CFA_offset: r24 at cfa-224 +# DW_CFA_offset: r28 at cfa-216 +# DW_CFA_offset: r25 at cfa-208 +# DW_CFA_offset: r29 at cfa-200 +# DW_CFA_offset: r26 at cfa-192 +# DW_CFA_offset: r30 at cfa-184 +# DW_CFA_offset: r27 at cfa-176 +# DW_CFA_offset: r31 at cfa-168 # DW_CFA_nop # DW_CFA_nop # DW_CFA_nop # # CHECK: Contents of section .eh_frame: -# CHECK-NEXT: 0000 00000014 00000000 037a5200 01780e01 .........zR..x.. -# CHECK-NEXT: 0010 1b0c0fa0 01000000 0000001c 0000001c ................ -# CHECK-NEXT: 0020 00000000 00000012 00468d07 8e068f05 .........F...... -# CHECK-NEXT: 0030 440ec002 00000000 D....... +# CHECK-NEXT: 0000 00000014 00000000 037a5200 01780e01 {{.*}} +# CHECK-NEXT: 0010 1b0c0fa0 01000000 0000002c 0000001c {{.*}} +# CHECK-NEXT: 0020 00000000 00000032 00468d07 8e068f05 {{.*}} +# CHECK-NEXT: 0030 440e8003 60981c9c 1b991a9d 199a189e {{.*}} +# CHECK-NEXT: 0040 179b169f 15000000 {{.*}} From beefa3ada0e8eb8f9c655c75de28d98edd0d6bcb Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 10 Jul 2014 10:52:51 +0000 Subject: [PATCH 0735/1155] [SystemZ] Avoid using i8 constants for immediate fields Immediate fields that have no natural MVT type tended to use i8 if the field was small enough. This was a bit confusing since i8 isn't a legal type for the target. Fields for short immediates in a 32-bit or 64-bit operation use i32 or i64 instead, so it would be better to do the same for all fields. No behavioral change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212702 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 39 ++++++++++++----------- lib/Target/SystemZ/SystemZInstrInfo.td | 24 +++++++------- lib/Target/SystemZ/SystemZOperands.td | 29 +++++++---------- lib/Target/SystemZ/SystemZOperators.td | 12 +++---- lib/Target/SystemZ/SystemZPatterns.td | 8 ++--- 5 files changed, 54 insertions(+), 58 deletions(-) diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index add675a22ccc..a1ae802ce150 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -691,7 +691,7 @@ class CondStoreRSY opcode, class AsmCondStoreRSY opcode, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdaddr20only> - : InstRSY, Requires<[FeatureLoadStoreOnCond]> { let mayStore = 1; @@ -730,7 +730,7 @@ class UnaryRRE opcode, SDPatternOperator operator, class UnaryRRF opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF { let OpKey = mnemonic ## cls1; let OpType = "reg"; @@ -739,7 +739,7 @@ class UnaryRRF opcode, RegisterOperand cls1, class UnaryRRF4 opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF; // These instructions are generated by if conversion. The old value of R1 @@ -757,7 +757,7 @@ class CondUnaryRRF opcode, RegisterOperand cls1, // mask is the third operand rather than being part of the mnemonic. class AsmCondUnaryRRF opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF, Requires<[FeatureLoadStoreOnCond]> { let Constraints = "$R1 = $R1src"; @@ -823,7 +823,7 @@ class CondUnaryRSY opcode, class AsmCondUnaryRSY opcode, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdaddr20only> - : InstRSY, Requires<[FeatureLoadStoreOnCond]> { let mayLoad = 1; @@ -1315,22 +1315,23 @@ multiclass CmpSwapRSPair rsOpcode, bits<16> rsyOpcode, class RotateSelectRIEf opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRIEf { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class PrefetchRXY opcode, SDPatternOperator operator> - : InstRXY; + [(operator imm32zx4:$R1, bdxaddr20only:$XBD2)]>; class PrefetchRILPC opcode, SDPatternOperator operator> - : InstRIL { + [(operator imm32zx4:$R1, pcrel32:$I2)]> { // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more // complex. @@ -1450,7 +1451,8 @@ class StoreRXYPseudo : Pseudo<(outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; @@ -1460,9 +1462,9 @@ class RotateSelectRIEfPseudo // the value of the PSW's 2-bit condition code field. class SelectWrapper : Pseudo<(outs cls:$dst), - (ins cls:$src1, cls:$src2, uimm8zx4:$valid, uimm8zx4:$cc), + (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, - uimm8zx4:$valid, uimm8zx4:$cc))]> { + imm32zx4:$valid, imm32zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves // change CC, the insertion requires new blocks, and CC cannot be live @@ -1476,14 +1478,14 @@ multiclass CondStores { let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { def "" : Pseudo<(outs), - (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask cls:$new, (load mode:$addr), - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr)]>; def Inv : Pseudo<(outs), - (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask (load mode:$addr), cls:$new, - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr)]>; } } @@ -1611,6 +1613,7 @@ class CompareAliasRI : Alias<6, (outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> { + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), []> { let Constraints = "$R1 = $R1src"; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index e70df92ffe81..3639c1abdb49 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -63,11 +63,11 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1, brtarget32:$I2), "jg$R1\t$I2", []>; } - def AsmBRC : InstRI<0xA74, (outs), (ins uimm8zx4:$R1, brtarget16:$I2), + def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2), "brc\t$R1, $I2", []>; - def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2), + def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2), "brcl\t$R1, $I2", []>; - def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2), + def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2), "bcr\t$R1, $R2", []>; } @@ -109,7 +109,7 @@ multiclass CompareBranches { } let isCodeGenOnly = 1 in defm C : CompareBranches; -defm AsmC : CompareBranches; +defm AsmC : CompareBranches; // Define AsmParser mnemonics for each general condition-code mask // (integer or floating-point) @@ -855,7 +855,7 @@ let Defs = [CC] in { } // AND to memory - defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; + defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, imm32zx8>; // Block AND. let mayLoad = 1, mayStore = 1 in @@ -912,7 +912,7 @@ let Defs = [CC] in { } // OR to memory - defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; + defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, imm32zx8>; // Block OR. let mayLoad = 1, mayStore = 1 in @@ -952,7 +952,7 @@ let Defs = [CC] in { } // XOR to memory - defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; + defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, imm32zx8>; // Block XOR. let mayLoad = 1, mayStore = 1 in @@ -1403,15 +1403,15 @@ def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), // Optimize sign-extended 1/0 selects to -1/0 selects. This is important // for vector legalization. -def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, uimm8zx4:$cc)), +def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)), (i32 31)), (i32 31)), - (Select32 (LHI -1), (LHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; -def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, - uimm8zx4:$cc)))), + (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>; +def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, + imm32zx4:$cc)))), (i32 63)), (i32 63)), - (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; + (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>; // Peepholes for turning scalar operations into block operations. defm : BlockLoadStore; def S32Imm : ImmediateAsmOperand<"S32Imm">; def U32Imm : ImmediateAsmOperand<"U32Imm">; -//===----------------------------------------------------------------------===// -// 8-bit immediates -//===----------------------------------------------------------------------===// - -def uimm8zx4 : Immediate(N->getZExtValue()); -}], NOOP_SDNodeXForm, "U4Imm">; - -def uimm8zx6 : Immediate(N->getZExtValue()); -}], NOOP_SDNodeXForm, "U6Imm">; - -def simm8 : Immediate; -def uimm8 : Immediate; - //===----------------------------------------------------------------------===// // i32 immediates //===----------------------------------------------------------------------===// @@ -241,6 +226,14 @@ def imm32lh16c : Immediate; // Short immediates +def imm32zx4 : Immediate(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U4Imm">; + +def imm32zx6 : Immediate(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U6Imm">; + def imm32sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; @@ -470,13 +463,13 @@ def AccessReg : AsmOperandClass { let Name = "AccessReg"; let ParserMethod = "parseAccessReg"; } -def access_reg : ImmediategetZExtValue() < 16; }], +def access_reg : ImmediategetZExtValue() < 16; }], NOOP_SDNodeXForm, "AccessReg"> { let ParserMatchClass = AccessReg; } // A 4-bit condition-code mask. -def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>, - Operand { +def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>, + Operand { let PrintMethod = "printCond4Operand"; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index a3919618f8f2..c70e662db427 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -19,14 +19,14 @@ def SDT_ZICmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def SDT_ZBRCCMask : SDTypeProfile<0, 3, - [SDTCisVT<0, i8>, - SDTCisVT<1, i8>, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, SDTCisVT<2, OtherVT>]>; def SDT_ZSelectCCMask : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i8>, - SDTCisVT<4, i8>]>; + SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; def SDT_ZWrapPtr : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -37,7 +37,7 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2, def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; def SDT_ZExtractAccess : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i8>]>; + SDTCisVT<1, i32>]>; def SDT_ZGR128Binary32 : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisVT<1, untyped>, @@ -77,7 +77,7 @@ def SDT_ZString : SDTypeProfile<1, 3, SDTCisVT<3, i32>]>; def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; def SDT_ZPrefetch : SDTypeProfile<0, 2, - [SDTCisVT<0, i8>, + [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index c0f94ecbe2c9..e307f8a888ee 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -101,15 +101,15 @@ multiclass CondStores64 { def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr), (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, - uimm8zx4:$valid, uimm8zx4:$cc)>; + imm32zx4:$valid, imm32zx4:$cc)>; def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr), (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, - uimm8zx4:$valid, uimm8zx4:$cc)>; + imm32zx4:$valid, imm32zx4:$cc)>; } // Try to use MVC instruction INSN for a load of type LOAD followed by a store From 9aa8beb9d6b6324c8b79a5fb6df30ac822c4c991 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 10 Jul 2014 11:00:55 +0000 Subject: [PATCH 0736/1155] [SystemZ] Add MC support for LEDBRA, LEXBRA and LDXBRA These instructions aren't used for codegen since the original L*DB instructions are suitable for fround. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212703 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFP.td | 7 ++++ test/MC/Disassembler/SystemZ/insns.txt | 54 ++++++++++++++++++++++++++ test/MC/SystemZ/insn-bad-z196.s | 54 ++++++++++++++++++++++++++ test/MC/SystemZ/insn-bad.s | 15 +++++++ test/MC/SystemZ/insn-good-z196.s | 42 ++++++++++++++++++++ 5 files changed, 172 insertions(+) diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index a1e782cdfd77..e8841e131324 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -133,6 +133,13 @@ def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>; def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>; def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>; +def LEDBRA : UnaryRRF4<"ledbra", 0xB344, FP32, FP64>, + Requires<[FeatureFPExtension]>; +def LEXBRA : UnaryRRF4<"lexbra", 0xB346, FP128, FP128>, + Requires<[FeatureFPExtension]>; +def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>, + Requires<[FeatureFPExtension]>; + def : Pat<(f32 (fround FP128:$src)), (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; def : Pat<(f64 (fround FP128:$src)), diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt index 1a5634d0ab38..54a3c5b1d6a1 100644 --- a/test/MC/Disassembler/SystemZ/insns.txt +++ b/test/MC/Disassembler/SystemZ/insns.txt @@ -3355,6 +3355,24 @@ # CHECK: ldxbr %f13, %f13 0xb3 0x45 0x00 0xdd +# CHECK: ldxbra %f0, 0, %f0, 1 +0xb3 0x45 0x01 0x00 + +# CHECK: ldxbra %f0, 0, %f0, 15 +0xb3 0x45 0x0f 0x00 + +# CHECK: ldxbra %f0, 0, %f13, 1 +0xb3 0x45 0x01 0x0d + +# CHECK: ldxbra %f0, 15, %f0, 1 +0xb3 0x45 0xf1 0x00 + +# CHECK: ldxbra %f4, 5, %f8, 9 +0xb3 0x45 0x59 0x48 + +# CHECK: ldxbra %f13, 0, %f0, 1 +0xb3 0x45 0x01 0xd0 + # CHECK: ldy %f0, -524288 0xed 0x00 0x00 0x00 0x80 0x65 @@ -3400,6 +3418,24 @@ # CHECK: ledbr %f15, %f15 0xb3 0x44 0x00 0xff +# CHECK: ledbra %f0, 0, %f0, 1 +0xb3 0x44 0x01 0x00 + +# CHECK: ledbra %f0, 0, %f0, 15 +0xb3 0x44 0x0f 0x00 + +# CHECK: ledbra %f0, 0, %f15, 1 +0xb3 0x44 0x01 0x0f + +# CHECK: ledbra %f0, 15, %f0, 1 +0xb3 0x44 0xf1 0x00 + +# CHECK: ledbra %f4, 5, %f6, 7 +0xb3 0x44 0x57 0x46 + +# CHECK: ledbra %f15, 0, %f0, 1 +0xb3 0x44 0x01 0xf0 + # CHECK: ler %f0, %f9 0x38 0x09 @@ -3448,6 +3484,24 @@ # CHECK: lexbr %f13, %f13 0xb3 0x46 0x00 0xdd +# CHECK: lexbra %f0, 0, %f0, 1 +0xb3 0x46 0x01 0x00 + +# CHECK: lexbra %f0, 0, %f0, 15 +0xb3 0x46 0x0f 0x00 + +# CHECK: lexbra %f0, 0, %f13, 1 +0xb3 0x46 0x01 0x0d + +# CHECK: lexbra %f0, 15, %f0, 1 +0xb3 0x46 0xf1 0x00 + +# CHECK: lexbra %f4, 5, %f8, 9 +0xb3 0x46 0x59 0x48 + +# CHECK: lexbra %f13, 0, %f0, 1 +0xb3 0x46 0x01 0xd0 + # CHECK: ley %f0, -524288 0xed 0x00 0x00 0x00 0x80 0x64 diff --git a/test/MC/SystemZ/insn-bad-z196.s b/test/MC/SystemZ/insn-bad-z196.s index da23a4b039cf..47dbe08b2525 100644 --- a/test/MC/SystemZ/insn-bad-z196.s +++ b/test/MC/SystemZ/insn-bad-z196.s @@ -410,6 +410,60 @@ lbh %r0, -524289 lbh %r0, 524288 +#CHECK: error: invalid operand +#CHECK: ldxbra %f0, 0, %f0, -1 +#CHECK: error: invalid operand +#CHECK: ldxbra %f0, 0, %f0, 16 +#CHECK: error: invalid operand +#CHECK: ldxbra %f0, -1, %f0, 0 +#CHECK: error: invalid operand +#CHECK: ldxbra %f0, 16, %f0, 0 +#CHECK: error: invalid register pair +#CHECK: ldxbra %f0, 0, %f2, 0 +#CHECK: error: invalid register pair +#CHECK: ldxbra %f2, 0, %f0, 0 + + ldxbra %f0, 0, %f0, -1 + ldxbra %f0, 0, %f0, 16 + ldxbra %f0, -1, %f0, 0 + ldxbra %f0, 16, %f0, 0 + ldxbra %f0, 0, %f2, 0 + ldxbra %f2, 0, %f0, 0 + +#CHECK: error: invalid operand +#CHECK: ledbra %f0, 0, %f0, -1 +#CHECK: error: invalid operand +#CHECK: ledbra %f0, 0, %f0, 16 +#CHECK: error: invalid operand +#CHECK: ledbra %f0, -1, %f0, 0 +#CHECK: error: invalid operand +#CHECK: ledbra %f0, 16, %f0, 0 + + ledbra %f0, 0, %f0, -1 + ledbra %f0, 0, %f0, 16 + ledbra %f0, -1, %f0, 0 + ledbra %f0, 16, %f0, 0 + +#CHECK: error: invalid operand +#CHECK: lexbra %f0, 0, %f0, -1 +#CHECK: error: invalid operand +#CHECK: lexbra %f0, 0, %f0, 16 +#CHECK: error: invalid operand +#CHECK: lexbra %f0, -1, %f0, 0 +#CHECK: error: invalid operand +#CHECK: lexbra %f0, 16, %f0, 0 +#CHECK: error: invalid register pair +#CHECK: lexbra %f0, 0, %f2, 0 +#CHECK: error: invalid register pair +#CHECK: lexbra %f2, 0, %f0, 0 + + lexbra %f0, 0, %f0, -1 + lexbra %f0, 0, %f0, 16 + lexbra %f0, -1, %f0, 0 + lexbra %f0, 16, %f0, 0 + lexbra %f0, 0, %f2, 0 + lexbra %f2, 0, %f0, 0 + #CHECK: error: invalid operand #CHECK: lfh %r0, -524289 #CHECK: error: invalid operand diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s index 8004168eecac..a08cb34da832 100644 --- a/test/MC/SystemZ/insn-bad.s +++ b/test/MC/SystemZ/insn-bad.s @@ -1560,6 +1560,11 @@ ldxbr %f0, %f2 ldxbr %f2, %f0 +#CHECK: error: {{(instruction requires: fp-extension)?}} +#CHECK: ldxbra %f0, 0, %f0, 0 + + ldxbra %f0, 0, %f0, 0 + #CHECK: error: invalid operand #CHECK: ldy %f0, -524289 #CHECK: error: invalid operand @@ -1576,6 +1581,11 @@ le %f0, -1 le %f0, 4096 +#CHECK: error: {{(instruction requires: fp-extension)?}} +#CHECK: ledbra %f0, 0, %f0, 0 + + ledbra %f0, 0, %f0, 0 + #CHECK: error: invalid register pair #CHECK: lexbr %f0, %f2 #CHECK: error: invalid register pair @@ -1584,6 +1594,11 @@ lexbr %f0, %f2 lexbr %f2, %f0 +#CHECK: error: {{(instruction requires: fp-extension)?}} +#CHECK: lexbra %f0, 0, %f0, 0 + + lexbra %f0, 0, %f0, 0 + #CHECK: error: invalid operand #CHECK: ley %f0, -524289 #CHECK: error: invalid operand diff --git a/test/MC/SystemZ/insn-good-z196.s b/test/MC/SystemZ/insn-good-z196.s index 834bdad2592f..db5ecdd238ca 100644 --- a/test/MC/SystemZ/insn-good-z196.s +++ b/test/MC/SystemZ/insn-good-z196.s @@ -675,6 +675,48 @@ lbh %r0, 524287(%r15,%r1) lbh %r15, 0 +#CHECK: ldxbra %f0, 0, %f0, 0 # encoding: [0xb3,0x45,0x00,0x00] +#CHECK: ldxbra %f0, 0, %f0, 15 # encoding: [0xb3,0x45,0x0f,0x00] +#CHECK: ldxbra %f0, 0, %f13, 0 # encoding: [0xb3,0x45,0x00,0x0d] +#CHECK: ldxbra %f0, 15, %f0, 0 # encoding: [0xb3,0x45,0xf0,0x00] +#CHECK: ldxbra %f4, 5, %f8, 9 # encoding: [0xb3,0x45,0x59,0x48] +#CHECK: ldxbra %f13, 0, %f0, 0 # encoding: [0xb3,0x45,0x00,0xd0] + + ldxbra %f0, 0, %f0, 0 + ldxbra %f0, 0, %f0, 15 + ldxbra %f0, 0, %f13, 0 + ldxbra %f0, 15, %f0, 0 + ldxbra %f4, 5, %f8, 9 + ldxbra %f13, 0, %f0, 0 + +#CHECK: ledbra %f0, 0, %f0, 0 # encoding: [0xb3,0x44,0x00,0x00] +#CHECK: ledbra %f0, 0, %f0, 15 # encoding: [0xb3,0x44,0x0f,0x00] +#CHECK: ledbra %f0, 0, %f15, 0 # encoding: [0xb3,0x44,0x00,0x0f] +#CHECK: ledbra %f0, 15, %f0, 0 # encoding: [0xb3,0x44,0xf0,0x00] +#CHECK: ledbra %f4, 5, %f6, 7 # encoding: [0xb3,0x44,0x57,0x46] +#CHECK: ledbra %f15, 0, %f0, 0 # encoding: [0xb3,0x44,0x00,0xf0] + + ledbra %f0, 0, %f0, 0 + ledbra %f0, 0, %f0, 15 + ledbra %f0, 0, %f15, 0 + ledbra %f0, 15, %f0, 0 + ledbra %f4, 5, %f6, 7 + ledbra %f15, 0, %f0, 0 + +#CHECK: lexbra %f0, 0, %f0, 0 # encoding: [0xb3,0x46,0x00,0x00] +#CHECK: lexbra %f0, 0, %f0, 15 # encoding: [0xb3,0x46,0x0f,0x00] +#CHECK: lexbra %f0, 0, %f13, 0 # encoding: [0xb3,0x46,0x00,0x0d] +#CHECK: lexbra %f0, 15, %f0, 0 # encoding: [0xb3,0x46,0xf0,0x00] +#CHECK: lexbra %f4, 5, %f8, 9 # encoding: [0xb3,0x46,0x59,0x48] +#CHECK: lexbra %f13, 0, %f0, 0 # encoding: [0xb3,0x46,0x00,0xd0] + + lexbra %f0, 0, %f0, 0 + lexbra %f0, 0, %f0, 15 + lexbra %f0, 0, %f13, 0 + lexbra %f0, 15, %f0, 0 + lexbra %f4, 5, %f8, 9 + lexbra %f13, 0, %f0, 0 + #CHECK: lfh %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0xca] #CHECK: lfh %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xca] #CHECK: lfh %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0xca] From a4e7f05a28d3454d63c5e3217b9ce6b72c741b82 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 10 Jul 2014 11:09:29 +0000 Subject: [PATCH 0737/1155] [x86] Add another combine that is particularly useful for the new vector shuffle lowering: match shuffle patterns equivalent to an unpcklwd or unpckhwd instruction. This allows us to use generic lowering code for v8i16 shuffles and match the unpack pattern late. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212705 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 41 ++++++++++++++++++++++ test/CodeGen/X86/vector-shuffle-128-v16.ll | 14 ++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 245cb414ca97..5ccff20e2943 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18694,6 +18694,47 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); } + // Look for shuffle patterns which can be implemented as a single unpack. + // FIXME: This doesn't handle the location of the PSHUFD generically, and + // only works when we have a PSHUFD followed by two half-shuffles. + if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && + (V.getOpcode() == X86ISD::PSHUFLW || + V.getOpcode() == X86ISD::PSHUFHW) && + V.getOpcode() != N.getOpcode() && + V.hasOneUse()) { + SDValue D = V.getOperand(0); + while (D.getOpcode() == ISD::BITCAST && D.hasOneUse()) + D = D.getOperand(0); + if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { + SmallVector VMask = getPSHUFShuffleMask(V); + SmallVector DMask = getPSHUFShuffleMask(D); + int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; + int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; + int WordMask[8]; + for (int i = 0; i < 4; ++i) { + WordMask[i + NOffset] = Mask[i] + NOffset; + WordMask[i + VOffset] = VMask[i] + VOffset; + } + // Map the word mask through the DWord mask. + int MappedMask[8]; + for (int i = 0; i < 8; ++i) + MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; + const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3}; + const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7}; + if (std::equal(std::begin(MappedMask), std::end(MappedMask), + std::begin(UnpackLoMask)) || + std::equal(std::begin(MappedMask), std::end(MappedMask), + std::begin(UnpackHiMask))) { + // We can replace all three shuffles with an unpack. + V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0)); + DCI.AddToWorklist(V.getNode()); + return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL + : X86ISD::UNPCKH, + DL, MVT::v8i16, V, V); + } + } + } + break; case X86ISD::PSHUFD: diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 71fac00021ff..4da7e42caabf 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -44,14 +44,22 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03( ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03 ; CHECK-SSE2: # BB#0: ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] -; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,1,1,4,5,6,7] -; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,7,7] +; CHECK-SSE2-NEXT: punpcklwd %xmm0, %xmm0 ; CHECK-SSE2-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } +define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SSE2-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07 +; CHECK-SSE2: # BB#0: +; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0 +; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} + define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 ; CHECK-SSE2: # BB#0: From 446067b143a6953f5d85a71039ed4ea93e168f09 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 10 Jul 2014 11:29:23 +0000 Subject: [PATCH 0738/1155] [SystemZ] Tweak instruction format classifications There's no real need to have Shift as a separate format type from Binary. The comments for other format types were too specific and in some cases no longer accurate. Just a clean-up, no behavioral change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212707 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 80 ++++++++++------------- lib/Target/SystemZ/SystemZInstrInfo.td | 16 ++--- 2 files changed, 43 insertions(+), 53 deletions(-) diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index a1ae802ce150..9f59a1c8e7e3 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -511,34 +511,24 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> // to store. Other stored registers are added as implicit uses. // // Unary: -// One register output operand and one input operand. The input -// operand may be a register, immediate or memory. +// One register output operand and one input operand. // // Binary: -// One register output operand and two input operands. The first -// input operand is always a register and the second may be a register, -// immediate or memory. -// -// Shift: -// One register output operand and two input operands. The first -// input operand is a register and the second has the same form as -// an address (although it isn't actually used to address memory). +// One register output operand and two input operands. // // Compare: -// Two input operands. The first operand is always a register, -// the second may be a register, immediate or memory. +// Two input operands and an implicit CC output operand. // // Ternary: -// One register output operand and three register input operands. +// One register output operand and three input operands. // // LoadAndOp: -// One output operand and two input operands. The first input operand -// is a register and the second is an address. +// One output operand and two input operands, one of which is an address. +// The instruction both reads from and writes to the address. // // CmpSwap: -// One output operand and three input operands. The first two -// operands are registers and the third is an address. The instruction -// both reads from and writes to the address. +// One output operand and three input operands, one of which is an address. +// The instruction both reads from and writes to the address. // // RotateSelect: // One output operand and five input operands. The first two operands @@ -993,6 +983,33 @@ class BinaryRIL opcode, SDPatternOperator operator, let DisableEncoding = "$R1src"; } +class BinaryRS opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRS { + let R3 = 0; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRSY opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRSY; + +multiclass BinaryRSAndK opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRSY, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRS; + } +} + class BinaryRX opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> @@ -1077,33 +1094,6 @@ multiclass BinarySIPair siOpcode, } } -class ShiftRS opcode, SDPatternOperator operator, - RegisterOperand cls> - : InstRS { - let R3 = 0; - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; -} - -class ShiftRSY opcode, SDPatternOperator operator, - RegisterOperand cls> - : InstRSY; - -multiclass ShiftRSAndK opcode1, bits<16> opcode2, - SDPatternOperator operator, RegisterOperand cls> { - let NumOpsKey = mnemonic in { - let NumOpsValue = "3" in - def K : ShiftRSY, - Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in - def "" : ShiftRS; - } -} - class CompareRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR; // Shift left. let neverHasSideEffects = 1 in { - defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; - def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>; + defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; + def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; } // Logical shift right. let neverHasSideEffects = 1 in { - defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; - def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>; + defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; + def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; } // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { - defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; - def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>; + defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; + def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; } // Rotate left. let neverHasSideEffects = 1 in { - def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>; - def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>; + def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; + def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; } // Rotate second operand left and inserted selected bits into first operand. From 5290734b8faec7a80a4bd65844397be4d89afe21 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 10 Jul 2014 11:37:28 +0000 Subject: [PATCH 0739/1155] Revert r212640, "Add trunc (select c, a, b) -> select c (trunc a), (trunc b) combine." This caused miscompilation on, at least, x86-64. SExt(i1 cond) confused other optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212708 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ---------- test/CodeGen/R600/select64.ll | 35 ------------------------ test/CodeGen/X86/shift-parts.ll | 8 ++---- 3 files changed, 3 insertions(+), 54 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index df273659e492..f6077b07d8d4 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6022,20 +6022,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } - // trunc (select c, a, b) -> select c, (trunc a), (trunc b) - if (N0.getOpcode() == ISD::SELECT) { - EVT SrcVT = N0.getValueType(); - if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && - TLI.isTruncateFree(SrcVT, VT)) { - SDLoc SL(N0); - SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); - SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); - EVT SetCCVT = getSetCCResultType(VT); - SDValue Cond = DAG.getSExtOrTrunc(N0.getOperand(0), SL, SetCCVT); - return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); - } - } - // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll index dba25e3bd21e..6b87d9865ad6 100644 --- a/test/CodeGen/R600/select64.ll +++ b/test/CodeGen/R600/select64.ll @@ -13,38 +13,3 @@ entry: store i64 %1, i64 addrspace(1)* %out ret void } - -; CHECK-LABEL: @select_trunc_i64 -; CHECK: V_CNDMASK_B32 -; CHECK-NOT: V_CNDMASK_B32 -define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind { - %cmp = icmp ugt i32 %cond, 5 - %sel = select i1 %cmp, i64 0, i64 %in - %trunc = trunc i64 %sel to i32 - store i32 %trunc, i32 addrspace(1)* %out, align 4 - ret void -} - -; CHECK-LABEL: @select_trunc_i64_2 -; CHECK: V_CNDMASK_B32 -; CHECK-NOT: V_CNDMASK_B32 -define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind { - %cmp = icmp ugt i32 %cond, 5 - %sel = select i1 %cmp, i64 %a, i64 %b - %trunc = trunc i64 %sel to i32 - store i32 %trunc, i32 addrspace(1)* %out, align 4 - ret void -} - -; CHECK-LABEL: @v_select_trunc_i64_2 -; CHECK: V_CNDMASK_B32 -; CHECK-NOT: V_CNDMASK_B32 -define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { - %cmp = icmp ugt i32 %cond, 5 - %a = load i64 addrspace(1)* %aptr, align 8 - %b = load i64 addrspace(1)* %bptr, align 8 - %sel = select i1 %cmp, i64 %a, i64 %b - %trunc = trunc i64 %sel to i32 - store i32 %trunc, i32 addrspace(1)* %out, align 4 - ret void -} diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll index 763da6397101..ce4f538f4de4 100644 --- a/test/CodeGen/X86/shift-parts.ll +++ b/test/CodeGen/X86/shift-parts.ll @@ -1,19 +1,17 @@ -; RUN: llc -march=x86-64 < %s | FileCheck %s +; RUN: llc < %s -march=x86-64 | grep shrdq ; PR4736 %0 = type { i32, i8, [35 x i8] } @g_144 = external global %0, align 8 ; <%0*> [#uses=1] -; CHECK: shrdq - -define i32 @int87(i32 %uint64p_8, i1 %cond) nounwind { +define i32 @int87(i32 %uint64p_8) nounwind { entry: %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; [#uses=1] br label %for.cond for.cond: ; preds = %for.cond, %entry - %call3.in.in.in.v = select i1 %cond, i320 192, i320 128 ; [#uses=1] + %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; [#uses=1] %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; [#uses=1] %call3.in = trunc i320 %call3.in.in.in to i32 ; [#uses=1] %tobool = icmp eq i32 %call3.in, 0 ; [#uses=1] From e93de7fb73c89dd007f547719f4ffd97c89dfc34 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 10 Jul 2014 11:37:39 +0000 Subject: [PATCH 0740/1155] llvm/test/CodeGen/X86/shift-parts.ll: FileCheck-ize. (from r212640) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212709 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/shift-parts.ll | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll index ce4f538f4de4..ddad307715a4 100644 --- a/test/CodeGen/X86/shift-parts.ll +++ b/test/CodeGen/X86/shift-parts.ll @@ -1,10 +1,12 @@ -; RUN: llc < %s -march=x86-64 | grep shrdq +; RUN: llc -march=x86-64 < %s | FileCheck %s ; PR4736 %0 = type { i32, i8, [35 x i8] } @g_144 = external global %0, align 8 ; <%0*> [#uses=1] +; CHECK: shrdq + define i32 @int87(i32 %uint64p_8) nounwind { entry: %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; [#uses=1] From 2f7fa8922ffb0bbb653c997e551a0916c1b602b7 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 10 Jul 2014 11:39:59 +0000 Subject: [PATCH 0741/1155] SpecialCaseList.h: Fix -Wdocumentation with \code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212710 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/SpecialCaseList.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/llvm/Support/SpecialCaseList.h b/include/llvm/Support/SpecialCaseList.h index 192e15ab2211..098b9c7a17b7 100644 --- a/include/llvm/Support/SpecialCaseList.h +++ b/include/llvm/Support/SpecialCaseList.h @@ -71,7 +71,9 @@ class SpecialCaseList { ~SpecialCaseList(); /// Returns true, if special case list contains a line + /// \code /// @Section:=@Category + /// \endcode /// and @Query satisfies a wildcard expression . bool inSection(const StringRef Section, const StringRef Query, const StringRef Category = StringRef()) const; From 35dda8a53c15b8a8e6c6c6d5b82b02750c9f5cc2 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 10 Jul 2014 11:44:37 +0000 Subject: [PATCH 0742/1155] [SystemZ] Use SystemZCallingConv.td to define callee-saved registers Just a clean-up. No behavioral change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212711 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZCallingConv.td | 10 ++++++++-- lib/Target/SystemZ/SystemZISelLowering.cpp | 6 ++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 4 +--- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 18 +++++++----------- lib/Target/SystemZ/SystemZRegisterInfo.h | 1 + 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td index c4f641e7bdec..fb0d1d8a3fe7 100644 --- a/lib/Target/SystemZ/SystemZCallingConv.td +++ b/lib/Target/SystemZ/SystemZCallingConv.td @@ -13,7 +13,7 @@ class CCIfExtend : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; //===----------------------------------------------------------------------===// -// SVR4 return value calling convention +// z/Linux return value calling convention //===----------------------------------------------------------------------===// def RetCC_SystemZ : CallingConv<[ // Promote i32 to i64 if it has an explicit extension type. @@ -39,7 +39,7 @@ def RetCC_SystemZ : CallingConv<[ ]>; //===----------------------------------------------------------------------===// -// SVR4 argument calling conventions +// z/Linux argument calling conventions //===----------------------------------------------------------------------===// def CC_SystemZ : CallingConv<[ // Promote i32 to i64 if it has an explicit extension type. @@ -63,3 +63,9 @@ def CC_SystemZ : CallingConv<[ // Other arguments are passed in 8-byte-aligned 8-byte stack slots. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> ]>; + +//===----------------------------------------------------------------------===// +// z/Linux callee-saved registers +//===----------------------------------------------------------------------===// +def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15), + (sequence "F%dD", 8, 15))>; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index f6c73c3df1ca..00c65f5bba6b 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -914,6 +914,12 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[I].first, RegsToPass[I].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + // Glue the call to the argument copies, if any. if (Glue.getNode()) Ops.push_back(Glue); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 8ea63e05c90d..f4951ad8e0ac 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -233,9 +233,7 @@ defm CondStore64 : CondStores; def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops), diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 84a33d8a1288..f03bcc412d51 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -22,18 +22,14 @@ using namespace llvm; SystemZRegisterInfo::SystemZRegisterInfo() : SystemZGenRegisterInfo(SystemZ::R14D) {} -const MCPhysReg* +const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const MCPhysReg CalleeSavedRegs[] = { - SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D, - SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, - SystemZ::R14D, SystemZ::R15D, - SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, - SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, - 0 - }; - - return CalleeSavedRegs; + return CSR_SystemZ_SaveList; +} + +const uint32_t * +SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + return CSR_SystemZ_RegMask; } BitVector diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index bf061b932d38..9bffa467a15d 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -45,6 +45,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { } const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, From cdbdfa28d12a190b01e13dadf41e9bf8c7cdb26a Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 10 Jul 2014 12:32:32 +0000 Subject: [PATCH 0743/1155] [x86,SDAG] Introduce any- and sign-extend-vector-inreg nodes analogous to the zero-extend-vector-inreg node introduced previously for the same purpose: manage the type legalization of widened extend operations, especially to support the experimental widening mode for x86. I'm adding both because sign-extend is expanded in terms of any-extend with shifts to propagate the sign bit. This removes the last fundamental scalarization from vec_cast2.ll (a test case that hit many really bad edge cases for widening legalization), although the trunc tests in that file still appear scalarized because the the shuffle legalization is scalarizing. Funny thing, I've been working on that. Some initial experiments with this and SSE2 scenarios is showing moderately good behavior already for sign extension. Still some work to do on the shuffle combining on X86 before we're generating optimal sequences, but avoiding scalarization is a huge step forward. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212714 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ISDOpcodes.h | 22 +++++++ include/llvm/CodeGen/SelectionDAG.h | 12 ++++ lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- .../SelectionDAG/LegalizeVectorOps.cpp | 66 +++++++++++++++++++ .../SelectionDAG/LegalizeVectorTypes.cpp | 28 +++++--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++++++ .../SelectionDAG/SelectionDAGDumper.cpp | 2 + lib/CodeGen/TargetLoweringBase.cpp | 4 ++ test/CodeGen/X86/vec_cast2.ll | 18 +++++ 9 files changed, 165 insertions(+), 9 deletions(-) diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index fb138f629aa5..a8f236805495 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -379,6 +379,28 @@ namespace ISD { /// operand, a ValueType node. SIGN_EXTEND_INREG, + /// ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an + /// in-register any-extension of the low lanes of an integer vector. The + /// result type must have fewer elements than the operand type, and those + /// elements must be larger integer types such that the total size of the + /// operand type and the result type match. Each of the low operand + /// elements is any-extended into the corresponding, wider result + /// elements with the high bits becoming undef. + ANY_EXTEND_VECTOR_INREG, + + /// SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an + /// in-register sign-extension of the low lanes of an integer vector. The + /// result type must have fewer elements than the operand type, and those + /// elements must be larger integer types such that the total size of the + /// operand type and the result type match. Each of the low operand + /// elements is sign-extended into the corresponding, wider result + /// elements. + // FIXME: The SIGN_EXTEND_INREG node isn't specifically limited to + // scalars, but it also doesn't handle vectors well. Either it should be + // restricted to scalars or this node (and its handling) should be merged + // into it. + SIGN_EXTEND_VECTOR_INREG, + /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an /// in-register zero-extension of the low lanes of an integer vector. The /// result type must have fewer elements than the operand type, and those diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 0a31905a37f2..5effb82d1c97 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -562,6 +562,18 @@ class SelectionDAG { /// value assuming it was the smaller SrcTy value. SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy); + /// getAnyExtendVectorInReg - Return an operation which will any-extend the + /// low lanes of the operand into the specified vector type. For example, + /// this can convert a v16i8 into a v4i32 by any-extending the low four + /// lanes of the operand from i8 to i32. + SDValue getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT); + + /// getSignExtendVectorInReg - Return an operation which will sign extend the + /// low lanes of the operand into the specified vector type. For example, + /// this can convert a v16i8 into a v4i32 by sign extending the low four + /// lanes of the operand from i8 to i32. + SDValue getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT); + /// getZeroExtendVectorInReg - Return an operation which will zero extend the /// low lanes of the operand into the specified vector type. For example, /// this can convert a v16i8 into a v4i32 by zero extending the low four diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e52fb3c05e39..d0ca6f870da4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -645,11 +645,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_SETCC(SDNode* N); - SDValue WidenVecOp_ZERO_EXTEND(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 25dc98e0eb9b..507e7ffb1d45 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -75,6 +75,20 @@ class VectorLegalizer { /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); + /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and bitcasts to the proper + /// type. The contents of the bits in the extended part of each element are + /// undef. + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place, bitcasts to the proper + /// type, then shifts left and arithmetic shifts right to introduce a sign + /// extension. + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and blends zeros into @@ -280,6 +294,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: QueryType = Node->getValueType(0); break; @@ -621,6 +637,10 @@ SDValue VectorLegalizer::Expand(SDValue Op) { switch (Op->getOpcode()) { case ISD::SIGN_EXTEND_INREG: return ExpandSEXTINREG(Op); + case ISD::ANY_EXTEND_VECTOR_INREG: + return ExpandANY_EXTEND_VECTOR_INREG(Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ExpandSIGN_EXTEND_VECTOR_INREG(Op); case ISD::ZERO_EXTEND_VECTOR_INREG: return ExpandZERO_EXTEND_VECTOR_INREG(Op); case ISD::BSWAP: @@ -717,6 +737,52 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +// Generically expand a vector anyext in register to a shuffle of the relevant +// lanes into the appropriate locations, with other lanes left undef. +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build a base mask of undef shuffles. + SmallVector ShuffleMask; + ShuffleMask.resize(NumSrcElements, -1); + + // Place the extended lanes into the correct locations. + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = i; + + return DAG.getNode( + ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // First build an any-extend node which can be legalized above when we + // recurse through it. + Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); + + // Now we need sign extend. Do this by shifting the elements. Even if these + // aren't legal operations, they have a better chance of being legalized + // without full scalarization than the sign extension does. + unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); + unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT); + return DAG.getNode(ISD::SRA, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), + ShiftAmount); +} + // Generically expand a vector zext in register to a shuffle of the relevant // lanes into the appropriate locations, a blend of zero into the high bits, // and a bitcast to the wider element type. diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 415d4e3f5073..f77c592fddb7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2398,7 +2398,12 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; - case ISD::ZERO_EXTEND: Res = WidenVecOp_ZERO_EXTEND(N); break; + + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2406,8 +2411,6 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecOp_Convert(N); break; } @@ -2428,14 +2431,14 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { return false; } -SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue InOp = N->getOperand(0); // If some legalization strategy other than widening is used on the operand, - // we can't safely assume that just zero-extending the low lanes is the - // correct transformation. + // we can't safely assume that just extending the low lanes is the correct + // transformation. if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) return WidenVecOp_Convert(N); InOp = GetWidenedVector(InOp); @@ -2476,9 +2479,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { return WidenVecOp_Convert(N); } - // Use a special DAG node to represent the operation of zero extending the + // Use special DAG nodes to represent the operation of extending the // low lanes. - return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + case ISD::SIGN_EXTEND: + return DAG.getSignExtendVectorInReg(InOp, DL, VT); + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + } } SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6a3fa7aef1fc..daff1f228c71 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1033,6 +1033,26 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, Op.getValueType())); } +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); +} + SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 63746e1cdc08..b3a452f2148d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -221,6 +221,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; + case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index afa58e780ffe..c574fd4854a9 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -746,6 +746,10 @@ void TargetLoweringBase::initActions() { if (VT >= MVT::FIRST_VECTOR_VALUETYPE && VT <= MVT::LAST_VECTOR_VALUETYPE) { setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, (MVT::SimpleValueType)VT, Expand); } diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll index c7f1554cdd4e..1a6c05dd9f41 100644 --- a/test/CodeGen/X86/vec_cast2.ll +++ b/test/CodeGen/X86/vec_cast2.ll @@ -4,6 +4,17 @@ ;CHECK-LABEL: foo1_8: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo1_8: +;CHECK-WIDE: vpmovzxbd %xmm0, %xmm1 +;CHECK-WIDE-NEXT: vpslld $24, %xmm1, %xmm1 +;CHECK-WIDE-NEXT: vpsrad $24, %xmm1, %xmm1 +;CHECK-WIDE-NEXT: vpshufb {{.*}}, %xmm0, %xmm0 +;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0 +;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0 +;CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +;CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0 +;CHECK-WIDE-NEXT: ret define <8 x float> @foo1_8(<8 x i8> %src) { %res = sitofp <8 x i8> %src to <8 x float> ret <8 x float> %res @@ -12,6 +23,13 @@ define <8 x float> @foo1_8(<8 x i8> %src) { ;CHECK-LABEL: foo1_4: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo1_4: +;CHECK-WIDE: vpmovzxbd %xmm0, %xmm0 +;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0 +;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0 +;CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0 +;CHECK-WIDE-NEXT: ret define <4 x float> @foo1_4(<4 x i8> %src) { %res = sitofp <4 x i8> %src to <4 x float> ret <4 x float> %res From a2bc40371007da34a230227b250effc27a72d7a5 Mon Sep 17 00:00:00 2001 From: Zinovy Nis Date: Thu, 10 Jul 2014 13:03:26 +0000 Subject: [PATCH 0744/1155] [x32] Add AsmBackend for X32 which uses ELF32 with x86_64 (the author is Pavel Chupin). This is minimal change for backend required to have "hello world" compiled and working on x32 target (x86_64-linux-gnux32). More patches for x32 will follow. Differential Revision: http://reviews.llvm.org/D4181 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212716 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index c12a57a06d66..23bca0d53d3d 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -366,6 +366,17 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend { } }; +class ELFX86_X32AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) + : ELFX86AsmBackend(T, OSABI, CPU) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, + ELF::EM_X86_64); + } +}; + class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) @@ -824,5 +835,8 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, return new WindowsX86AsmBackend(T, true, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); + + if (TheTriple.getEnvironment() == Triple::GNUX32) + return new ELFX86_X32AsmBackend(T, OSABI, CPU); return new ELFX86_64AsmBackend(T, OSABI, CPU); } From 24a071b8c5565c9bdf7f92af487dd70b3026c4f9 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 10 Jul 2014 13:38:23 +0000 Subject: [PATCH 0745/1155] [mips] Add support for -modd-spreg/-mno-odd-spreg Summary: When -mno-odd-spreg is in effect, 32-bit floating point values are not permitted in odd FPU registers. The option also prohibits 32-bit and 64-bit floating point comparison results from being written to odd registers. This option has three purposes: * It allows support for certain MIPS implementations such as loongson-3a that do not allow the use of odd registers for single precision arithmetic. * When using -mfpxx, -mno-odd-spreg is the default and this allows us to statically check that code is compliant with the O32 FPXX ABI since mtc1/mfc1 instructions to/from odd registers are guaranteed not to appear for any reason. Once this has been established, the user can then re-enable -modd-spreg to regain the use of all 32 single-precision registers. * When using -mfp64 and -mno-odd-spreg together, an O32 extension named O32 FP64A is used as the ABI. This is intended to provide almost all functionality of an FR=1 processor but can also be executed on a FR=0 core with the assistance of a hardware compatibility mode which emulates FR=0 behaviour on an FR=1 processor. * Added '.module oddspreg' and '.module nooddspreg' each of which update the .MIPS.abiflags section appropriately * Moved setFpABI() call inside emitDirectiveModuleFP() so that the caller doesn't have to remember to do it. * MipsABIFlags now calculates the flags1 and flags2 member on demand rather than trying to maintain them in the same format they will be emitted in. There is one portion of the -mfp64 and -mno-odd-spreg combination that is not implemented yet. Moves to/from odd-numbered double-precision registers must not use mtc1. I will fix this in a follow-up. Differential Revision: http://reviews.llvm.org/D4383 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212717 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 108 +++++++++++++----- .../Mips/MCTargetDesc/MipsABIFlagsSection.cpp | 52 ++++++--- .../Mips/MCTargetDesc/MipsABIFlagsSection.h | 79 +++++++++---- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 33 ++++-- lib/Target/Mips/Mips.td | 3 + lib/Target/Mips/MipsAsmPrinter.cpp | 8 +- lib/Target/Mips/MipsRegisterInfo.cpp | 5 + lib/Target/Mips/MipsRegisterInfo.td | 6 + lib/Target/Mips/MipsSubtarget.cpp | 20 ++-- lib/Target/Mips/MipsSubtarget.h | 5 + lib/Target/Mips/MipsTargetStreamer.h | 29 +++-- test/CodeGen/Mips/no-odd-spreg.ll | 54 +++++++++ test/MC/Mips/nooddspreg-cmdarg.s | 43 +++++++ test/MC/Mips/nooddspreg-error.s | 14 +++ test/MC/Mips/nooddspreg.s | 45 ++++++++ test/MC/Mips/oddspreg.s | 56 +++++++++ 16 files changed, 462 insertions(+), 98 deletions(-) create mode 100644 test/CodeGen/Mips/no-odd-spreg.ll create mode 100644 test/MC/Mips/nooddspreg-cmdarg.s create mode 100644 test/MC/Mips/nooddspreg-error.s create mode 100644 test/MC/Mips/nooddspreg.s create mode 100644 test/MC/Mips/oddspreg.s diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 44f18a8d1cbe..0c06be8c7d99 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -165,7 +165,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseDirectiveGpDWord(); bool parseDirectiveModule(); bool parseDirectiveModuleFP(); - bool parseFpABIValue(Val_GNU_MIPS_ABI &FpABI, StringRef Directive); + bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, + StringRef Directive); MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); @@ -235,6 +236,9 @@ class MipsAsmParser : public MCTargetAsmParser { ((STI.getFeatureBits() & Mips::FeatureEABI) != 0) + ((STI.getFeatureBits() & Mips::FeatureN32) != 0) + ((STI.getFeatureBits() & Mips::FeatureN64) != 0)) == 1); + + if (!isABI_O32() && !allowOddSPReg() != 0) + report_fatal_error("-mno-odd-spreg requires the O32 ABI"); } MCAsmParser &getParser() const { return Parser; } @@ -250,6 +254,10 @@ class MipsAsmParser : public MCTargetAsmParser { bool isABI_O32() const { return STI.getFeatureBits() & Mips::FeatureO32; } bool isABI_FPXX() const { return false; } // TODO: add check for FeatureXX + bool allowOddSPReg() const { + return !(STI.getFeatureBits() & Mips::FeatureNoOddSPReg); + } + bool inMicroMipsMode() const { return STI.getFeatureBits() & Mips::FeatureMicroMips; } @@ -563,6 +571,10 @@ class MipsOperand : public MCParsedAsmOperand { void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getFGR32Reg())); + // FIXME: We ought to do this for -integrated-as without -via-file-asm too. + if (!AsmParser.allowOddSPReg() && RegIdx.Index & 1) + AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU " + "registers"); } void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const { @@ -2444,7 +2456,7 @@ bool MipsAsmParser::parseSetNoMips16Directive() { } bool MipsAsmParser::parseSetFpDirective() { - Val_GNU_MIPS_ABI FpAbiVal; + MipsABIFlagsSection::FpABIKind FpAbiVal; // Line can be: .set fp=32 // .set fp=xx // .set fp=64 @@ -2464,7 +2476,7 @@ bool MipsAsmParser::parseSetFpDirective() { reportParseError("unexpected token in statement"); return false; } - getTargetStreamer().emitDirectiveSetFp(FpAbiVal, isABI_O32()); + getTargetStreamer().emitDirectiveSetFp(FpAbiVal); Parser.Lex(); // Consume the EndOfStatement. return false; } @@ -2784,29 +2796,73 @@ bool MipsAsmParser::parseDirectiveOption() { return false; } +/// parseDirectiveModule +/// ::= .module oddspreg +/// ::= .module nooddspreg +/// ::= .module fp=value bool MipsAsmParser::parseDirectiveModule() { - // Line can be: .module fp=32 - // .module fp=xx - // .module fp=64 + MCAsmLexer &Lexer = getLexer(); + SMLoc L = Lexer.getLoc(); + if (!getTargetStreamer().getCanHaveModuleDir()) { // TODO : get a better message. reportParseError(".module directive must appear before any code"); return false; } - AsmToken Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier) && Tok.getString() != "fp") { - reportParseError("unexpected token in .module directive, 'fp' expected"); - return false; + + if (Lexer.is(AsmToken::Identifier)) { + StringRef Option = Parser.getTok().getString(); + Parser.Lex(); + + if (Option == "oddspreg") { + getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32()); + clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("Expected end of statement"); + return false; + } + + return false; + } else if (Option == "nooddspreg") { + if (!isABI_O32()) { + Error(L, "'.module nooddspreg' requires the O32 ABI"); + return false; + } + + getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32()); + setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("Expected end of statement"); + return false; + } + + return false; + } else if (Option == "fp") { + return parseDirectiveModuleFP(); + } + + return Error(L, "'" + Twine(Option) + "' is not a valid .module option."); } - Parser.Lex(); // Eat fp token - Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Equal)) { + + return false; +} + +/// parseDirectiveModuleFP +/// ::= =32 +/// ::= =xx +/// ::= =64 +bool MipsAsmParser::parseDirectiveModuleFP() { + MCAsmLexer &Lexer = getLexer(); + + if (Lexer.isNot(AsmToken::Equal)) { reportParseError("unexpected token in statement"); return false; } Parser.Lex(); // Eat '=' token. - Val_GNU_MIPS_ABI FpABI; + MipsABIFlagsSection::FpABIKind FpABI; if (!parseFpABIValue(FpABI, ".module")) return false; @@ -2817,11 +2873,11 @@ bool MipsAsmParser::parseDirectiveModule() { // Emit appropriate flags. getTargetStreamer().emitDirectiveModuleFP(FpABI, isABI_O32()); - + Parser.Lex(); // Consume the EndOfStatement. return false; } -bool MipsAsmParser::parseFpABIValue(Val_GNU_MIPS_ABI &FpABI, +bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, StringRef Directive) { MCAsmLexer &Lexer = getLexer(); @@ -2839,7 +2895,7 @@ bool MipsAsmParser::parseFpABIValue(Val_GNU_MIPS_ABI &FpABI, return false; } - FpABI = MipsABIFlagsSection::Val_GNU_MIPS_ABI_FP_XX; + FpABI = MipsABIFlagsSection::FpABIKind::XX; return true; } @@ -2858,21 +2914,11 @@ bool MipsAsmParser::parseFpABIValue(Val_GNU_MIPS_ABI &FpABI, return false; } - FpABI = MipsABIFlagsSection::Val_GNU_MIPS_ABI_FP_DOUBLE; - return true; - } else { - if (isABI_N32() || isABI_N64()) { - FpABI = MipsABIFlagsSection::Val_GNU_MIPS_ABI_FP_DOUBLE; - return true; - } + FpABI = MipsABIFlagsSection::FpABIKind::S32; + } else + FpABI = MipsABIFlagsSection::FpABIKind::S64; - if (isABI_O32()) { - FpABI = MipsABIFlagsSection::Val_GNU_MIPS_ABI_FP_64; - return true; - } - - llvm_unreachable("Unknown ABI"); - } + return true; } return false; diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp index a901af2df75d..3202b5e60d44 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp @@ -11,16 +11,30 @@ using namespace llvm; -StringRef MipsABIFlagsSection::getFpABIString(Val_GNU_MIPS_ABI Value, - bool Is32BitAbi) { +uint8_t MipsABIFlagsSection::getFpABIValue() { + switch (FpABI) { + case FpABIKind::ANY: + return Val_GNU_MIPS_ABI_FP_ANY; + case FpABIKind::XX: + return Val_GNU_MIPS_ABI_FP_XX; + case FpABIKind::S32: + return Val_GNU_MIPS_ABI_FP_DOUBLE; + case FpABIKind::S64: + if (Is32BitABI) + return OddSPReg ? Val_GNU_MIPS_ABI_FP_64 : Val_GNU_MIPS_ABI_FP_64A; + return Val_GNU_MIPS_ABI_FP_DOUBLE; + default: + return 0; + } +} + +StringRef MipsABIFlagsSection::getFpABIString(FpABIKind Value) { switch (Value) { - case MipsABIFlagsSection::Val_GNU_MIPS_ABI_FP_XX: + case FpABIKind::XX: return "xx"; - case MipsABIFlagsSection::Val_GNU_MIPS_ABI_FP_64: - return "64"; - case MipsABIFlagsSection::Val_GNU_MIPS_ABI_FP_DOUBLE: - if (Is32BitAbi) - return "32"; + case FpABIKind::S32: + return "32"; + case FpABIKind::S64: return "64"; default: llvm_unreachable("unsupported fp abi value"); @@ -30,17 +44,17 @@ StringRef MipsABIFlagsSection::getFpABIString(Val_GNU_MIPS_ABI Value, namespace llvm { MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) { // Write out a Elf_Internal_ABIFlags_v0 struct - OS.EmitIntValue(ABIFlagsSection.getVersion(), 2); // version - OS.EmitIntValue(ABIFlagsSection.getISALevel(), 1); // isa_level - OS.EmitIntValue(ABIFlagsSection.getISARevision(), 1); // isa_rev - OS.EmitIntValue(ABIFlagsSection.getGPRSize(), 1); // gpr_size - OS.EmitIntValue(ABIFlagsSection.getCPR1Size(), 1); // cpr1_size - OS.EmitIntValue(ABIFlagsSection.getCPR2Size(), 1); // cpr2_size - OS.EmitIntValue(ABIFlagsSection.getFpABI(), 1); // fp_abi - OS.EmitIntValue(ABIFlagsSection.getISAExtensionSet(), 4); // isa_ext - OS.EmitIntValue(ABIFlagsSection.getASESet(), 4); // ases - OS.EmitIntValue(ABIFlagsSection.getFlags1(), 4); // flags1 - OS.EmitIntValue(ABIFlagsSection.getFlags2(), 4); // flags2 + OS.EmitIntValue(ABIFlagsSection.getVersionValue(), 2); // version + OS.EmitIntValue(ABIFlagsSection.getISALevelValue(), 1); // isa_level + OS.EmitIntValue(ABIFlagsSection.getISARevisionValue(), 1); // isa_rev + OS.EmitIntValue(ABIFlagsSection.getGPRSizeValue(), 1); // gpr_size + OS.EmitIntValue(ABIFlagsSection.getCPR1SizeValue(), 1); // cpr1_size + OS.EmitIntValue(ABIFlagsSection.getCPR2SizeValue(), 1); // cpr2_size + OS.EmitIntValue(ABIFlagsSection.getFpABIValue(), 1); // fp_abi + OS.EmitIntValue(ABIFlagsSection.getISAExtensionSetValue(), 4); // isa_ext + OS.EmitIntValue(ABIFlagsSection.getASESetValue(), 4); // ases + OS.EmitIntValue(ABIFlagsSection.getFlags1Value(), 4); // flags1 + OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2 return OS; } } diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h index ed6df5a76174..ab18c4413efd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h @@ -69,9 +69,17 @@ struct MipsABIFlagsSection { Val_GNU_MIPS_ABI_FP_ANY = 0, Val_GNU_MIPS_ABI_FP_DOUBLE = 1, Val_GNU_MIPS_ABI_FP_XX = 5, - Val_GNU_MIPS_ABI_FP_64 = 6 + Val_GNU_MIPS_ABI_FP_64 = 6, + Val_GNU_MIPS_ABI_FP_64A = 7 }; + enum AFL_FLAGS1 { + AFL_FLAGS1_ODDSPREG = 1 + }; + + // Internal representation of the values used in .module fp=value + enum class FpABIKind { ANY, XX, S32, S64 }; + // Version of flags structure. uint16_t Version; // The level of the ISA: 1-5, 32, 64. @@ -84,31 +92,52 @@ struct MipsABIFlagsSection { AFL_REG CPR1Size; // The size of co-processor 2 registers. AFL_REG CPR2Size; - // The floating-point ABI. - Val_GNU_MIPS_ABI FpABI; // Processor-specific extension. uint32_t ISAExtensionSet; // Mask of ASEs used. uint32_t ASESet; + bool OddSPReg; + + bool Is32BitABI; + +protected: + // The floating-point ABI. + FpABIKind FpABI; + +public: MipsABIFlagsSection() : Version(0), ISALevel(0), ISARevision(0), GPRSize(AFL_REG_NONE), - CPR1Size(AFL_REG_NONE), CPR2Size(AFL_REG_NONE), - FpABI(Val_GNU_MIPS_ABI_FP_ANY), ISAExtensionSet(0), ASESet(0) {} - - uint16_t getVersion() { return (uint16_t)Version; } - uint8_t getISALevel() { return (uint8_t)ISALevel; } - uint8_t getISARevision() { return (uint8_t)ISARevision; } - uint8_t getGPRSize() { return (uint8_t)GPRSize; } - uint8_t getCPR1Size() { return (uint8_t)CPR1Size; } - uint8_t getCPR2Size() { return (uint8_t)CPR2Size; } - uint8_t getFpABI() { return (uint8_t)FpABI; } - uint32_t getISAExtensionSet() { return (uint32_t)ISAExtensionSet; } - uint32_t getASESet() { return (uint32_t)ASESet; } - uint32_t getFlags1() { return 0; } - uint32_t getFlags2() { return 0; } - - StringRef getFpABIString(Val_GNU_MIPS_ABI Value, bool Is32BitAbi); + CPR1Size(AFL_REG_NONE), CPR2Size(AFL_REG_NONE), ISAExtensionSet(0), + ASESet(0), OddSPReg(false), Is32BitABI(false), FpABI(FpABIKind::ANY) {} + + uint16_t getVersionValue() { return (uint16_t)Version; } + uint8_t getISALevelValue() { return (uint8_t)ISALevel; } + uint8_t getISARevisionValue() { return (uint8_t)ISARevision; } + uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; } + uint8_t getCPR1SizeValue() { return (uint8_t)CPR1Size; } + uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; } + uint8_t getFpABIValue(); + uint32_t getISAExtensionSetValue() { return (uint32_t)ISAExtensionSet; } + uint32_t getASESetValue() { return (uint32_t)ASESet; } + + uint32_t getFlags1Value() { + uint32_t Value = 0; + + if (OddSPReg) + Value |= (uint32_t)AFL_FLAGS1_ODDSPREG; + + return Value; + } + + uint32_t getFlags2Value() { return 0; } + + FpABIKind getFpABI() { return FpABI; } + void setFpABI(FpABIKind Value, bool IsABI32Bit) { + FpABI = Value; + Is32BitABI = IsABI32Bit; + } + StringRef getFpABIString(FpABIKind Value); template void setISALevelAndRevisionFromPredicates(const PredicateLibrary &P) { @@ -177,16 +206,18 @@ struct MipsABIFlagsSection { template void setFpAbiFromPredicates(const PredicateLibrary &P) { - FpABI = Val_GNU_MIPS_ABI_FP_ANY; + Is32BitABI = P.isABI_O32(); + + FpABI = FpABIKind::ANY; if (P.isABI_N32() || P.isABI_N64()) - FpABI = Val_GNU_MIPS_ABI_FP_DOUBLE; + FpABI = FpABIKind::S64; else if (P.isABI_O32()) { if (P.isFP64bit()) - FpABI = Val_GNU_MIPS_ABI_FP_64; + FpABI = FpABIKind::S64; else if (P.isABI_FPXX()) - FpABI = Val_GNU_MIPS_ABI_FP_XX; + FpABI = FpABIKind::XX; else - FpABI = Val_GNU_MIPS_ABI_FP_DOUBLE; + FpABI = FpABIKind::S32; } } diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 5ebcbc8cb0fe..fbe375b89641 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -59,6 +59,11 @@ void MipsTargetStreamer::emitDirectiveCpload(unsigned RegNo) {} void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { } +void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled, + bool IsO32ABI) { + if (!Enabled && !IsO32ABI) + report_fatal_error("+nooddspreg is only valid for O32"); +} MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) @@ -211,26 +216,33 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, setCanHaveModuleDir(false); } -void MipsTargetAsmStreamer::emitDirectiveModuleFP(Val_GNU_MIPS_ABI Value, - bool Is32BitAbi) { - MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitAbi); +void MipsTargetAsmStreamer::emitDirectiveModuleFP( + MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) { + MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI); StringRef ModuleValue; OS << "\t.module\tfp="; - OS << ABIFlagsSection.getFpABIString(Value, Is32BitAbi) << "\n"; + OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } -void MipsTargetAsmStreamer::emitDirectiveSetFp(Val_GNU_MIPS_ABI Value, - bool Is32BitAbi) { +void MipsTargetAsmStreamer::emitDirectiveSetFp( + MipsABIFlagsSection::FpABIKind Value) { StringRef ModuleValue; OS << "\t.set\tfp="; - OS << ABIFlagsSection.getFpABIString(Value, Is32BitAbi) << "\n"; + OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } void MipsTargetAsmStreamer::emitMipsAbiFlags() { // No action required for text output. } +void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled, + bool IsO32ABI) { + MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI); + + OS << "\t.module\t" << (Enabled ? "" : "no") << "oddspreg\n"; +} + // This part is for ELF object output. MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) @@ -633,3 +645,10 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() { OS << ABIFlagsSection; } + +void MipsTargetELFStreamer::emitDirectiveModuleOddSPReg(bool Enabled, + bool IsO32ABI) { + MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI); + + ABIFlagsSection.OddSPReg = Enabled; +} diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index ea16331f71d8..2ea31d4aeb9b 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -73,6 +73,9 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64", "Enable n64 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; +def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false", + "Disable odd numbered single-precision " + "registers">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", "true", "Enable vector FPU instructions.">; def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 4d4fcd96a4fe..1fb75a290b98 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -704,9 +704,13 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { OutContext.getELFSection(".gcc_compiled_long64", ELF::SHT_PROGBITS, 0, SectionKind::getDataRel())); } + getTargetStreamer().updateABIInfo(*Subtarget); - getTargetStreamer().emitDirectiveModuleFP( - getTargetStreamer().getABIFlagsSection().FpABI, Subtarget->isABI_O32()); + getTargetStreamer().emitDirectiveModuleFP(); + + if (Subtarget->isABI_O32()) + getTargetStreamer().emitDirectiveModuleOddSPReg(Subtarget->useOddSPReg(), + Subtarget->isABI_O32()); } void MipsAsmPrinter::EmitJal(MCSymbol *Symbol) { diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 83d25ab469c3..5703d6b41b14 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -201,6 +201,11 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::GP_64); } + if (Subtarget.isABI_O32() && !Subtarget.useOddSPReg()) { + for (const auto &Reg : Mips::OddSPRegClass) + Reserved.set(Reg); + } + return Reserved; } diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index b5897af52cdc..6323da378850 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -340,6 +340,12 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; +// Used to reserve odd registers when given -mattr=+nooddspreg +def OddSP : RegisterClass<"Mips", [f32], 32, + (add (decimate (sequence "F%u", 1, 31), 2), + (decimate (sequence "F_HI%u", 1, 31), 2))>, + Unallocatable; + // FP control registers. def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>, Unallocatable; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index bc185cd01cbc..535ee2be3413 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -107,13 +107,14 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, Reloc::Model _RM, MipsTargetMachine *_TM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), - IsFP64bit(false), IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), - HasCnMips(false), IsLinux(true), HasMips3_32(false), HasMips3_32r2(false), - HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false), - InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), - InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), - AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), - RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT), + IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false), + IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true), + HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false), + HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false), + InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), + HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), + HasMSA(false), RM(_RM), OverrideMode(NoOverride), TM(_TM), + TargetTriple(TT), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))), TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*TM)), FrameLowering(MipsFrameLowering::create(*TM, *this)), @@ -151,6 +152,11 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, "See -mattr=+fp64.", false); + if (!isABI_O32() && !useOddSPReg()) + report_fatal_error("-mattr=+nooddspreg is not currently permitted for a " + "the O32 ABI.", + false); + if (hasMips32r6()) { StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6"; diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 1d94bbaafb77..eb9d58a5aad6 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -65,6 +65,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // IsFP64bit - The target processor has 64-bit floating point registers. bool IsFP64bit; + /// Are odd single-precision registers permitted? + /// This corresponds to -modd-spreg and -mno-odd-spreg + bool UseOddSPReg; + // IsNan2008 - IEEE 754-2008 NaN encoding. bool IsNaN2008bit; @@ -203,6 +207,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } + bool useOddSPReg() const { return UseOddSPReg; } bool isNaN2008() const { return IsNaN2008bit; } bool isNotFP64bit() const { return !IsFP64bit; } bool isGP64bit() const { return IsGP64bit; } diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 18434116f39a..99f7d4c92cfb 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -18,8 +18,6 @@ namespace llvm { struct MipsABIFlagsSection; -typedef MipsABIFlagsSection::Val_GNU_MIPS_ABI Val_GNU_MIPS_ABI; - class MipsTargetStreamer : public MCTargetStreamer { public: MipsTargetStreamer(MCStreamer &S); @@ -56,11 +54,23 @@ class MipsTargetStreamer : public MCTargetStreamer { virtual void emitDirectiveCpload(unsigned RegNo); virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg); - // ABI Flags - virtual void emitDirectiveModuleFP(Val_GNU_MIPS_ABI Value, bool Is32BitAbi) { - ABIFlagsSection.FpABI = Value; + + /// Emit a '.module fp=value' directive using the given values. + /// Updates the .MIPS.abiflags section + virtual void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value, + bool Is32BitABI) { + ABIFlagsSection.setFpABI(Value, Is32BitABI); } - virtual void emitDirectiveSetFp(Val_GNU_MIPS_ABI Value, bool Is32BitAbi){}; + + /// Emit a '.module fp=value' directive using the current values of the + /// .MIPS.abiflags section. + void emitDirectiveModuleFP() { + emitDirectiveModuleFP(ABIFlagsSection.getFpABI(), + ABIFlagsSection.Is32BitABI); + } + + virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI); + virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){}; virtual void emitMipsAbiFlags(){}; void setCanHaveModuleDir(bool Can) { canHaveModuleDirective = Can; } bool getCanHaveModuleDir() { return canHaveModuleDirective; } @@ -122,8 +132,10 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { const MCSymbol &Sym, bool IsReg) override; // ABI Flags - void emitDirectiveModuleFP(Val_GNU_MIPS_ABI Value, bool Is32BitAbi) override; - void emitDirectiveSetFp(Val_GNU_MIPS_ABI Value, bool Is32BitAbi) override; + void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value, + bool Is32BitABI) override; + void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; + void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override; void emitMipsAbiFlags() override; }; @@ -177,6 +189,7 @@ class MipsTargetELFStreamer : public MipsTargetStreamer { const MCSymbol &Sym, bool IsReg) override; // ABI Flags + void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; void emitMipsAbiFlags() override; protected: diff --git a/test/CodeGen/Mips/no-odd-spreg.ll b/test/CodeGen/Mips/no-odd-spreg.ll new file mode 100644 index 000000000000..b42ed6aaa406 --- /dev/null +++ b/test/CodeGen/Mips/no-odd-spreg.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG +; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG +; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG +; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=fp64,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG + +; ODDSPREG: .module oddspreg +; NOODDSPREG: .module nooddspreg + +define float @two_floats(float %a) { +entry: + ; Clobber all except $f12 and $f13 + ; + ; The intention is that if odd single precision registers are permitted, the + ; allocator will choose $f12 and $f13 to avoid the spill/reload. + ; + ; On the other hand, if odd single precision registers are not permitted, it + ; will be forced to spill/reload either %a or %0. + + %0 = fadd float %a, 1.0 + call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + %1 = fadd float %a, %0 + ret float %1 +} + +; ALL-LABEL: two_floats: +; ODDSPREG: add.s $f13, $f12, ${{f[0-9]+}} +; ODDSPREG-NOT: swc1 +; ODDSPREG-NOT: lwc1 +; ODDSPREG: add.s $f0, $f12, $f13 + +; NOODDSPREG: add.s $[[T0:f[0-9]*[02468]]], $f12, ${{f[0-9]+}} +; NOODDSPREG: swc1 $[[T0]], +; NOODDSPREG: lwc1 $[[T1:f[0-9]*[02468]]], +; NOODDSPREG: add.s $f0, $f12, $[[T1]] + +define double @two_doubles(double %a) { +entry: + ; Clobber all except $f12 and $f13 + ; + ; -mno-odd-sp-reg doesn't need to affect double precision values so both cases + ; use $f12 and $f13. + + %0 = fadd double %a, 1.0 + call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + %1 = fadd double %a, %0 + ret double %1 +} + +; ALL-LABEL: two_doubles: +; ALL: add.d $[[T0:f[0-9]+]], $f12, ${{f[0-9]+}} +; ALL: add.d $f0, $f12, $[[T0]] + + +; INVALID: -mattr=+nooddspreg is not currently permitted for a 32-bit FPU register file (FR=0 mode). diff --git a/test/MC/Mips/nooddspreg-cmdarg.s b/test/MC/Mips/nooddspreg-cmdarg.s new file mode 100644 index 000000000000..826db1236a91 --- /dev/null +++ b/test/MC/Mips/nooddspreg-cmdarg.s @@ -0,0 +1,43 @@ +# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64,+nooddspreg | \ +# RUN: FileCheck %s -check-prefix=CHECK-ASM +# +# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64,+nooddspreg -filetype=obj -o - | \ +# RUN: llvm-readobj -sections -section-data -section-relocations - | \ +# RUN: FileCheck %s -check-prefix=CHECK-OBJ + +# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 -mattr=-n64,+n32,+nooddspreg 2> %t0 +# RUN: FileCheck %s -check-prefix=INVALID < %t0 +# +# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 -mattr=+nooddspreg 2> %t0 +# RUN: FileCheck %s -check-prefix=INVALID < %t0 +# +# CHECK-ASM-NOT: .module nooddspreg + +# Checking if the Mips.abiflags were correctly emitted. +# CHECK-OBJ: Section { +# CHECK-OBJ: Index: 5 +# CHECK-OBJ: Name: .MIPS.abiflags (12) +# CHECK-OBJ: Type: (0x7000002A) +# CHECK-OBJ: Flags [ (0x2) +# CHECK-OBJ: SHF_ALLOC (0x2) +# CHECK-OBJ: ] +# CHECK-OBJ: Address: 0x0 +# CHECK-OBJ: Offset: 0x50 +# CHECK-OBJ: Size: 24 +# CHECK-OBJ: Link: 0 +# CHECK-OBJ: Info: 0 +# CHECK-OBJ: AddressAlignment: 8 +# CHECK-OBJ: EntrySize: 0 +# CHECK-OBJ: Relocations [ +# CHECK-OBJ: ] +# CHECK-OBJ: SectionData ( +# CHECK-OBJ: 0000: 00002001 01020007 00000000 00000000 |.. .............| +# CHECK-OBJ: 0010: 00000000 00000000 |........| +# CHECK-OBJ: ) +# CHECK-OBJ: } + +# INVALID: ERROR: -mno-odd-spreg requires the O32 ABI + +# FIXME: Test should include gnu_attributes directive when implemented. +# An explicit .gnu_attribute must be checked against the effective +# command line options and any inconsistencies reported via a warning. diff --git a/test/MC/Mips/nooddspreg-error.s b/test/MC/Mips/nooddspreg-error.s new file mode 100644 index 000000000000..b4aabbef7280 --- /dev/null +++ b/test/MC/Mips/nooddspreg-error.s @@ -0,0 +1,14 @@ +# RUN: not llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 2> %t0 | \ +# RUN: FileCheck %s -check-prefix=CHECK-ASM +# RUN: FileCheck %s -check-prefix=CHECK-ERROR < %t0 +# + .module nooddspreg +# CHECK-ASM: .module nooddspreg + + add.s $f1, $f2, $f5 +# CHECK-ERROR: :[[@LINE-1]]:15: error: -mno-odd-spreg prohibits the use of odd FPU registers +# CHECK-ERROR: :[[@LINE-2]]:25: error: -mno-odd-spreg prohibits the use of odd FPU registers + +# FIXME: Test should include gnu_attributes directive when implemented. +# An explicit .gnu_attribute must be checked against the effective +# command line options and any inconsistencies reported via a warning. diff --git a/test/MC/Mips/nooddspreg.s b/test/MC/Mips/nooddspreg.s new file mode 100644 index 000000000000..5a283f54f7ef --- /dev/null +++ b/test/MC/Mips/nooddspreg.s @@ -0,0 +1,45 @@ +# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 | \ +# RUN: FileCheck %s -check-prefix=CHECK-ASM +# +# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 -filetype=obj -o - | \ +# RUN: llvm-readobj -sections -section-data -section-relocations - | \ +# RUN: FileCheck %s -check-prefix=CHECK-OBJ + +# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 -mattr=-n64,n32 2> %t1 +# RUN: FileCheck %s -check-prefix=INVALID < %t1 +# +# RUN: not llvm-mc %s -arch=mips -mcpu=mips64 2> %t2 +# RUN: FileCheck %s -check-prefix=INVALID < %t2 +# +# CHECK-ASM: .module nooddspreg + +# Checking if the Mips.abiflags were correctly emitted. +# CHECK-OBJ: Section { +# CHECK-OBJ: Index: 5 +# CHECK-OBJ: Name: .MIPS.abiflags (12) +# CHECK-OBJ: Type: (0x7000002A) +# CHECK-OBJ: Flags [ (0x2) +# CHECK-OBJ: SHF_ALLOC (0x2) +# CHECK-OBJ: ] +# CHECK-OBJ: Address: 0x0 +# CHECK-OBJ: Offset: 0x50 +# CHECK-OBJ: Size: 24 +# CHECK-OBJ: Link: 0 +# CHECK-OBJ: Info: 0 +# CHECK-OBJ: AddressAlignment: 8 +# CHECK-OBJ: EntrySize: 0 +# CHECK-OBJ: Relocations [ +# CHECK-OBJ: ] +# CHECK-OBJ: SectionData ( +# CHECK-OBJ: 0000: 00002001 01020007 00000000 00000000 |.. .............| +# CHECK-OBJ: 0010: 00000000 00000000 |........| +# CHECK-OBJ: ) +# CHECK-OBJ: } + +# INVALID: '.module nooddspreg' requires the O32 ABI + + .module nooddspreg + +# FIXME: Test should include gnu_attributes directive when implemented. +# An explicit .gnu_attribute must be checked against the effective +# command line options and any inconsistencies reported via a warning. diff --git a/test/MC/Mips/oddspreg.s b/test/MC/Mips/oddspreg.s new file mode 100644 index 000000000000..f5aa9c00db0e --- /dev/null +++ b/test/MC/Mips/oddspreg.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 | \ +# RUN: FileCheck %s -check-prefix=CHECK-ASM +# +# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64 -filetype=obj -o - | \ +# RUN: llvm-readobj -sections -section-data -section-relocations - | \ +# RUN: FileCheck %s -check-prefix=CHECK-OBJ-ALL -check-prefix=CHECK-OBJ-O32 +# +# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -mattr=-n64,+n32 | \ +# RUN: FileCheck %s -check-prefix=CHECK-ASM +# +# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -mattr=-n64,+n32 -filetype=obj -o - | \ +# RUN: llvm-readobj -sections -section-data -section-relocations - | \ +# RUN: FileCheck %s -check-prefix=CHECK-OBJ-ALL -check-prefix=CHECK-OBJ-N32 + +# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 | \ +# RUN: FileCheck %s -check-prefix=CHECK-ASM +# +# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -filetype=obj -o - | \ +# RUN: llvm-readobj -sections -section-data -section-relocations - | \ +# RUN: FileCheck %s -check-prefix=CHECK-OBJ-ALL -check-prefix=CHECK-OBJ-N64 + +# CHECK-ASM: .module oddspreg + +# Checking if the Mips.abiflags were correctly emitted. +# CHECK-OBJ-ALL: Section { +# CHECK-OBJ-ALL: Index: 5 +# CHECK-OBJ-ALL: Name: .MIPS.abiflags ({{[0-9]+}}) +# CHECK-OBJ-ALL: Type: (0x7000002A) +# CHECK-OBJ-ALL: Flags [ (0x2) +# CHECK-OBJ-ALL: SHF_ALLOC (0x2) +# CHECK-OBJ-ALL: ] +# CHECK-OBJ-ALL: Address: 0x0 +# CHECK-OBJ-ALL: Offset: 0x{{[0-9A-F]+}} +# CHECK-OBJ-ALL: Size: 24 +# CHECK-OBJ-ALL: Link: 0 +# CHECK-OBJ-ALL: Info: 0 +# CHECK-OBJ-ALL: AddressAlignment: 8 +# CHECK-OBJ-ALL: EntrySize: 0 +# CHECK-OBJ-ALL: Relocations [ +# CHECK-OBJ-ALL: ] +# CHECK-OBJ-ALL: SectionData ( +# CHECK-OBJ-O32: 0000: 00002001 01020006 00000000 00000000 |.. .............| +# CHECK-OBJ-O32: 0010: 00000001 00000000 |........| +# CHECK-OBJ-N32: 0000: 00004001 02020001 00000000 00000000 |..@.............| +# CHECK-OBJ-N32: 0010: 00000001 00000000 |........| +# CHECK-OBJ-N64: 0000: 00004001 02020001 00000000 00000000 |..@.............| +# CHECK-OBJ-N64: 0010: 00000001 00000000 |........| +# CHECK-OBJ-ALL: ) +# CHECK-OBJ-ALL: } + + .module oddspreg + add.s $f3, $f1, $f5 + +# FIXME: Test should include gnu_attributes directive when implemented. +# An explicit .gnu_attribute must be checked against the effective +# command line options and any inconsistencies reported via a warning. From 6b0ac2aa02d66883884818282a2318ee5d2c0b13 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 10 Jul 2014 14:18:46 +0000 Subject: [PATCH 0746/1155] AArch64: correctly fast-isel i8 & i16 multiplies We were asking for a register for type i8 or i16 which caused an assert. rdar://problem/17620015 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212718 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 1 + test/CodeGen/AArch64/fast-isel-mul.ll | 40 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 test/CodeGen/AArch64/fast-isel-mul.ll diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index c4e0ff0c38c9..2164d77b7900 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1907,6 +1907,7 @@ bool AArch64FastISel::SelectMul(const Instruction *I) { case MVT::i32: ZReg = AArch64::WZR; Opc = AArch64::MADDWrrr; + SrcVT = MVT::i32; break; case MVT::i64: ZReg = AArch64::XZR; diff --git a/test/CodeGen/AArch64/fast-isel-mul.ll b/test/CodeGen/AArch64/fast-isel-mul.ll new file mode 100644 index 000000000000..d02c67f52f8d --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-mul.ll @@ -0,0 +1,40 @@ +; RUN: llc -fast-isel -fast-isel-abort -mtriple=aarch64 -o - %s | FileCheck %s + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_mul8(i8 %lhs, i8 %rhs) { +; CHECK-LABEL: test_mul8: +; CHECK: mul w0, w0, w1 +; %lhs = load i8* @var8 +; %rhs = load i8* @var8 + %prod = mul i8 %lhs, %rhs + store i8 %prod, i8* @var8 + ret void +} + +define void @test_mul16(i16 %lhs, i16 %rhs) { +; CHECK-LABEL: test_mul16: +; CHECK: mul w0, w0, w1 + %prod = mul i16 %lhs, %rhs + store i16 %prod, i16* @var16 + ret void +} + +define void @test_mul32(i32 %lhs, i32 %rhs) { +; CHECK-LABEL: test_mul32: +; CHECK: mul w0, w0, w1 + %prod = mul i32 %lhs, %rhs + store i32 %prod, i32* @var32 + ret void +} + +define void @test_mul64(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: test_mul64: +; CHECK: mul x0, x0, x1 + %prod = mul i64 %lhs, %rhs + store i64 %prod, i64* @var64 + ret void +} From 73118c41286084de76c9375e7ac40eb46cf40747 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 10 Jul 2014 14:41:31 +0000 Subject: [PATCH 0747/1155] Feeding isSafeToSpeculativelyExecute its DataLayout pointer isSafeToSpeculativelyExecute can optionally take a DataLayout pointer. In the past, this was mainly used to make better decisions regarding divisions known not to trap, and so was not all that important for users concerned with "cheap" instructions. However, now it also helps look through bitcasts for dereferencable loads, and will also be important if/when we add a dereferencable pointer attribute. This is some initial work to feed a DataLayout pointer through to callers of isSafeToSpeculativelyExecute, generally where one was already available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212720 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Utils/Local.h | 2 +- include/llvm/Transforms/Utils/LoopUtils.h | 4 +- .../InstCombine/InstructionCombining.cpp | 2 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 6 ++- lib/Transforms/Utils/LoopSimplify.cpp | 17 ++++-- lib/Transforms/Utils/LoopUnroll.cpp | 5 +- lib/Transforms/Utils/SimplifyCFG.cpp | 52 ++++++++++--------- 7 files changed, 52 insertions(+), 36 deletions(-) diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 6f642692c7d4..c0c690664a9c 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -148,7 +148,7 @@ bool FlattenCFG(BasicBlock *BB, AliasAnalysis *AA = nullptr); /// and if a predecessor branches to us and one of our successors, fold the /// setcc into the predecessor and use logical operations to pick the right /// destination. -bool FoldBranchToCommonDest(BranchInst *BI); +bool FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL = nullptr); /// DemoteRegToStack - This function takes a virtual register computed by an /// Instruction and replaces it with a slot in the stack frame, allocated via diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index ee26d834c2a0..7e3a74aae93c 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -17,6 +17,7 @@ namespace llvm { class AliasAnalysis; class BasicBlock; +class DataLayout; class DominatorTree; class Loop; class LoopInfo; @@ -32,7 +33,8 @@ BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P); /// will optionally update \c AliasAnalysis and \c ScalarEvolution analyses if /// passed into it. bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, - AliasAnalysis *AA = nullptr, ScalarEvolution *SE = nullptr); + AliasAnalysis *AA = nullptr, ScalarEvolution *SE = nullptr, + const DataLayout *DL = nullptr); /// \brief Put loop into LCSSA form. /// diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 487699c41e95..46e3bfc7e44e 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1199,7 +1199,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. - if (!isSafeToSpeculativelyExecute(&Inst)) return nullptr; + if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr; unsigned VWidth = cast(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 8b5e036dbec2..b6fbb16166dd 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -924,8 +924,10 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. if (FutureSideEffects && - ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) || - (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) { + ((!isSimpleLoadStore(J1) && + !isSafeToSpeculativelyExecute(J1, DL)) || + (!isSimpleLoadStore(J2) && + !isSafeToSpeculativelyExecute(J2, DL)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (side effects prevent reordering)\n"); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index f7787dafd5bf..ef422914b6b2 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -473,7 +474,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP) { + ScalarEvolution *SE, Pass *PP, + const DataLayout *DL) { bool Changed = false; ReprocessLoop: @@ -672,7 +674,7 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI)) continue; + if (!FoldBranchToCommonDest(BI, DL)) continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. @@ -709,7 +711,8 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, } bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, - AliasAnalysis *AA, ScalarEvolution *SE) { + AliasAnalysis *AA, ScalarEvolution *SE, + const DataLayout *DL) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -726,7 +729,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, } while (!Worklist.empty()) - Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, SE, PP); + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, + SE, PP, DL); return Changed; } @@ -744,6 +748,7 @@ namespace { DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; + const DataLayout *DL; bool runOnFunction(Function &F) override; @@ -787,10 +792,12 @@ bool LoopSimplify::runOnFunction(Function &F) { LI = &getAnalysis(); DT = &getAnalysis().getDomTree(); SE = getAnalysisIfAvailable(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE); + Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL); return Changed; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 20150aa5596e..c86b82cf4fba 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" @@ -489,8 +490,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { + DataLayoutPass *DLP = PP->getAnalysisIfAvailable(); + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; ScalarEvolution *SE = PP->getAnalysisIfAvailable(); - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 4fd0b18ff839..960b198f36bd 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -201,8 +201,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the /// given instruction, which is assumed to be safe to speculate. 1 means /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. -static unsigned ComputeSpeculationCost(const User *I) { - assert(isSafeToSpeculativelyExecute(I) && +static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { + assert(isSafeToSpeculativelyExecute(I, DL) && "Instruction is not safe to speculatively execute!"); switch (Operator::getOpcode(I)) { default: @@ -257,7 +257,8 @@ static unsigned ComputeSpeculationCost(const User *I) { /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSet *AggressiveInsts, - unsigned &CostRemaining) { + unsigned &CostRemaining, + const DataLayout *DL) { Instruction *I = dyn_cast(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -290,10 +291,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I, DL)) return false; - unsigned Cost = ComputeSpeculationCost(I); + unsigned Cost = ComputeSpeculationCost(I, DL); if (Cost > CostRemaining) return false; @@ -303,7 +304,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -997,7 +998,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI) { +static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1071,9 +1072,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { if (BB1V == BB2V) continue; - if (isa(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + if (isa(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL)) return Changed; - if (isa(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + if (isa(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL)) return Changed; } } @@ -1390,7 +1391,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const DataLayout *DL) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -1433,13 +1435,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I) && + if (!isSafeToSpeculativelyExecute(I, DL) && !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) return false; // Store the store speculation candidate. @@ -1490,11 +1492,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0; if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) return false; @@ -1741,9 +1743,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0) || + MaxCostVal0, DL) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1)) + MaxCostVal1, DL)) return false; } @@ -1961,7 +1963,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) { BasicBlock *BB = BI->getParent(); Instruction *Cond = nullptr; @@ -2013,7 +2015,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *BonusInst = nullptr; if (&*FrontIt != Cond && FrontIt->hasOneUse() && FrontIt->user_back() == Cond && - isSafeToSpeculativelyExecute(FrontIt)) { + isSafeToSpeculativelyExecute(FrontIt, DL)) { BonusInst = &*FrontIt; ++FrontIt; @@ -4016,7 +4018,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI)) + if (FoldBranchToCommonDest(BI, DL)) return SimplifyCFG(BB, TTI, DL) | true; return false; } @@ -4060,7 +4062,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI)) + if (FoldBranchToCommonDest(BI, DL)) return SimplifyCFG(BB, TTI, DL) | true; // We have a conditional branch to two blocks that are only reachable @@ -4069,7 +4071,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI)) + if (HoistThenElseCodeToIf(BI, DL)) return SimplifyCFG(BB, TTI, DL) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally @@ -4077,7 +4079,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL)) return SimplifyCFG(BB, TTI, DL) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { @@ -4086,7 +4088,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL)) return SimplifyCFG(BB, TTI, DL) | true; } From a9af0558b20482d012814e2e32ee6d26c0729731 Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Thu, 10 Jul 2014 15:12:26 +0000 Subject: [PATCH 0748/1155] [AArch64] Add logical alias instructions to MC AsmParser This patch teaches the AsmParser to accept some logical+immediate instructions and convert them as shown: bic Rd, Rn, #imm -> and Rd, Rn, #~imm bics Rd, Rn, #imm -> ands Rd, Rn, #~imm orn Rd, Rn, #imm -> orr Rd, Rn, #~imm eon Rd, Rn, #imm -> eor Rd, Rn, #~imm Those instructions are an alternate syntax available to assembly coders, and are needed in order to support code already compiling with some other assemblers. For example, the bic construct is used by the linux kernel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212722 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrFormats.td | 49 +++++++++++++++---- lib/Target/AArch64/AArch64InstrInfo.td | 8 +-- .../AArch64/AsmParser/AArch64AsmParser.cpp | 33 +++++++++++++ test/MC/AArch64/alias-logicalimm.s | 41 ++++++++++++++++ test/MC/AArch64/basic-a64-diagnostics.s | 10 ++-- 5 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 test/MC/AArch64/alias-logicalimm.s diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 446149b4fb0d..5007172f1539 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -448,13 +448,19 @@ def logical_imm64_XFORM : SDNodeXFormgetTargetConstant(enc, MVT::i32); }]>; -def LogicalImm32Operand : AsmOperandClass { - let Name = "LogicalImm32"; - let DiagnosticType = "LogicalSecondSource"; -} -def LogicalImm64Operand : AsmOperandClass { - let Name = "LogicalImm64"; - let DiagnosticType = "LogicalSecondSource"; +let DiagnosticType = "LogicalSecondSource" in { + def LogicalImm32Operand : AsmOperandClass { + let Name = "LogicalImm32"; + } + def LogicalImm64Operand : AsmOperandClass { + let Name = "LogicalImm64"; + } + def LogicalImm32NotOperand : AsmOperandClass { + let Name = "LogicalImm32Not"; + } + def LogicalImm64NotOperand : AsmOperandClass { + let Name = "LogicalImm64Not"; + } } def logical_imm32 : Operand, PatLeaf<(imm), [{ return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32); @@ -468,6 +474,12 @@ def logical_imm64 : Operand, PatLeaf<(imm), [{ let PrintMethod = "printLogicalImm64"; let ParserMatchClass = LogicalImm64Operand; } +def logical_imm32_not : Operand { + let ParserMatchClass = LogicalImm32NotOperand; +} +def logical_imm64_not : Operand { + let ParserMatchClass = LogicalImm64NotOperand; +} // imm0_65535 predicate - True if the immediate is in the range [0,65535]. def Imm0_65535Operand : AsmImmRange<0, 65535>; @@ -1935,22 +1947,32 @@ class LogicalRegAlias : InstAlias; -let AddedComplexity = 6 in -multiclass LogicalImm opc, string mnemonic, SDNode OpNode> { +multiclass LogicalImm opc, string mnemonic, SDNode OpNode, + string Alias> { + let AddedComplexity = 6 in def Wri : BaseLogicalImm { let Inst{31} = 0; let Inst{22} = 0; // 64-bit version has an additional bit of immediate. } + let AddedComplexity = 6 in def Xri : BaseLogicalImm { let Inst{31} = 1; } + + def : InstAlias(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstAlias(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; } -multiclass LogicalImmS opc, string mnemonic, SDNode OpNode> { +multiclass LogicalImmS opc, string mnemonic, SDNode OpNode, + string Alias> { let isCompare = 1, Defs = [NZCV] in { def Wri : BaseLogicalImm { @@ -1962,6 +1984,13 @@ multiclass LogicalImmS opc, string mnemonic, SDNode OpNode> { let Inst{31} = 1; } } // end Defs = [NZCV] + + def : InstAlias(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstAlias(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; } class BaseLogicalRegPseudo diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index d7c6d78831f7..1211fba60c25 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -671,10 +671,10 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; //===----------------------------------------------------------------------===// // (immediate) -defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>; -defm AND : LogicalImm<0b00, "and", and>; -defm EOR : LogicalImm<0b10, "eor", xor>; -defm ORR : LogicalImm<0b01, "orr", or>; +defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; +defm AND : LogicalImm<0b00, "and", and, "bic">; +defm EOR : LogicalImm<0b10, "eor", xor, "eon">; +defm ORR : LogicalImm<0b01, "orr", or, "orn">; // FIXME: these aliases *are* canonical sometimes (when movz can't be // used). Actually, it seems to be working right now, but putting logical_immXX diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 69f51daa99be..c42d11e0f3d4 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -633,6 +633,23 @@ class AArch64Operand : public MCParsedAsmOperand { return false; return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64); } + bool isLogicalImm32Not() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; + return AArch64_AM::isLogicalImmediate(Val, 32); + } + bool isLogicalImm64Not() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64); + } bool isShiftedImm() const { return Kind == k_ShiftedImm; } bool isAddSubImm() const { if (!isShiftedImm() && !isImm()) @@ -1377,6 +1394,22 @@ class AArch64Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateImm(encoding)); } + void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = cast(getImm()); + int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; + uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32); + Inst.addOperand(MCOperand::CreateImm(encoding)); + } + + void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = cast(getImm()); + uint64_t encoding = + AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64); + Inst.addOperand(MCOperand::CreateImm(encoding)); + } + void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast(getImm()); diff --git a/test/MC/AArch64/alias-logicalimm.s b/test/MC/AArch64/alias-logicalimm.s new file mode 100644 index 000000000000..28ec40beac4d --- /dev/null +++ b/test/MC/AArch64/alias-logicalimm.s @@ -0,0 +1,41 @@ +// RUN: llvm-mc -triple=aarch64-none-linux-gnu < %s | FileCheck %s + +// CHECK: and x0, x1, #0xfffffffffffffffd +// CHECK: and x0, x1, #0xfffffffffffffffd + and x0, x1, #~2 + bic x0, x1, #2 + +// CHECK: and w0, w1, #0xfffffffd +// CHECK: and w0, w1, #0xfffffffd + and w0, w1, #~2 + bic w0, w1, #2 + +// CHECK: ands x0, x1, #0xfffffffffffffffd +// CHECK: ands x0, x1, #0xfffffffffffffffd + ands x0, x1, #~2 + bics x0, x1, #2 + +// CHECK: ands w0, w1, #0xfffffffd +// CHECK: ands w0, w1, #0xfffffffd + ands w0, w1, #~2 + bics w0, w1, #2 + +// CHECK: orr x0, x1, #0xfffffffffffffffd +// CHECK: orr x0, x1, #0xfffffffffffffffd + orr x0, x1, #~2 + orn x0, x1, #2 + +// CHECK: orr w2, w1, #0xfffffffc +// CHECK: orr w2, w1, #0xfffffffc + orr w2, w1, #~3 + orn w2, w1, #3 + +// CHECK: eor x0, x1, #0xfffffffffffffffd +// CHECK: eor x0, x1, #0xfffffffffffffffd + eor x0, x1, #~2 + eon x0, x1, #2 + +// CHECK: eor w2, w1, #0xfffffffc +// CHECK: eor w2, w1, #0xfffffffc + eor w2, w1, #~3 + eon w2, w1, #3 diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s index 2726d00ab9c0..5293131711b6 100644 --- a/test/MC/AArch64/basic-a64-diagnostics.s +++ b/test/MC/AArch64/basic-a64-diagnostics.s @@ -2985,13 +2985,17 @@ orn wsp, w3, w5 bics x20, sp, x9, lsr #0 orn x2, x6, sp, lsl #3 -// CHECK-ERROR: error: invalid operand for instruction +// FIXME: the diagnostic we get for 'orn wsp, w3, w5' is from the orn alias, +// which is a better match than the genuine ORNWri, whereas it would be better +// to get the ORNWri diagnostic when the alias did not match, i.e. the +// alias' diagnostics should have a lower priority. +// CHECK-ERROR: error: expected compatible register or logical immediate // CHECK-ERROR-NEXT: orn wsp, w3, w5 -// CHECK-ERROR-NEXT: ^ +// CHECK-ERROR-NEXT: ^ // CHECK-ERROR-NEXT: error: invalid operand for instruction // CHECK-ERROR-NEXT: bics x20, sp, x9, lsr #0 // CHECK-ERROR-NEXT: ^ -// CHECK-ERROR-NEXT: error: invalid operand for instruction +// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate // CHECK-ERROR-NEXT: orn x2, x6, sp, lsl #3 // CHECK-ERROR-NEXT: ^ From 0ef47114d31d10b3e074f622c1c1c0dd4cf2c9aa Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Thu, 10 Jul 2014 15:36:12 +0000 Subject: [PATCH 0749/1155] [mips] Added FPXX modeless calling convention. Differential Revision: http://reviews.llvm.org/D4293 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips.td | 2 + lib/Target/Mips/MipsCallingConv.td | 5 ++ lib/Target/Mips/MipsRegisterInfo.cpp | 6 ++ lib/Target/Mips/MipsSubtarget.cpp | 2 +- lib/Target/Mips/MipsSubtarget.h | 4 ++ test/CodeGen/Mips/cconv/callee-saved-fpxx.ll | 58 +++++++++++++++++++ test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll | 24 ++++++++ 7 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/Mips/cconv/callee-saved-fpxx.ll create mode 100644 test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 2ea31d4aeb9b..dd3bc9b08fc8 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -61,6 +61,8 @@ def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true", "General Purpose Registers are 64-bit wide.">; def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", "Support 64-bit FP registers.">; +def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true", + "Support for FPXX.">; def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true", "IEEE 754-2008 NaN encoding.">; def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index c83d880cdb2f..007213c848f8 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -239,6 +239,11 @@ def RetCC_Mips : CallingConv<[ def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP, (sequence "S%u", 7, 0))>; +def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, + (sequence "S%u", 7, 0))> { + let OtherPreserved = (add (decimate (sequence "F%u", 30, 20), 2)); +} + def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, (sequence "S%u", 7, 0))>; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5703d6b41b14..084449bba59c 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -93,6 +93,9 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isFP64bit()) return CSR_O32_FP64_SaveList; + if (Subtarget.isFPXX()) + return CSR_O32_FPXX_SaveList; + return CSR_O32_SaveList; } @@ -110,6 +113,9 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { if (Subtarget.isFP64bit()) return CSR_O32_FP64_RegMask; + if (Subtarget.isFPXX()) + return CSR_O32_FPXX_RegMask; + return CSR_O32_RegMask; } diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 535ee2be3413..693daa3fde2e 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -107,7 +107,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, Reloc::Model _RM, MipsTargetMachine *_TM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), - IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false), + IsFPXX(false), IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true), HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index eb9d58a5aad6..a3dcf03c63a9 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -62,6 +62,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // floating point registers instead of only using even ones. bool IsSingleFloat; + // IsFPXX - MIPS O32 modeless ABI. + bool IsFPXX; + // IsFP64bit - The target processor has 64-bit floating point registers. bool IsFP64bit; @@ -206,6 +209,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { bool hasCnMips() const { return HasCnMips; } bool isLittle() const { return IsLittle; } + bool isFPXX() const { return IsFPXX; } bool isFP64bit() const { return IsFP64bit; } bool useOddSPReg() const { return UseOddSPReg; } bool isNaN2008() const { return IsNaN2008bit; } diff --git a/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll b/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll new file mode 100644 index 000000000000..4b28b9962075 --- /dev/null +++ b/test/CodeGen/Mips/cconv/callee-saved-fpxx.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s +; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s +; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV %s +; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV %s + +; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s +; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX %s +; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV --check-prefix=O32-FPXX-INV %s +; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=ALL --check-prefix=O32-FPXX-INV --check-prefix=O32-FPXX-INV %s + +define void @fpu_clobber() nounwind { +entry: + call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f13},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"() + ret void +} + +; O32-FPXX-LABEL: fpu_clobber: +; O32-FPXX-INV-NOT: sdc1 $f0, +; O32-FPXX-INV-NOT: sdc1 $f1, +; O32-FPXX-INV-NOT: sdc1 $f2, +; O32-FPXX-INV-NOT: sdc1 $f3, +; O32-FPXX-INV-NOT: sdc1 $f4, +; O32-FPXX-INV-NOT: sdc1 $f5, +; O32-FPXX-INV-NOT: sdc1 $f6, +; O32-FPXX-INV-NOT: sdc1 $f7, +; O32-FPXX-INV-NOT: sdc1 $f8, +; O32-FPXX-INV-NOT: sdc1 $f9, +; O32-FPXX-INV-NOT: sdc1 $f10, +; O32-FPXX-INV-NOT: sdc1 $f11, +; O32-FPXX-INV-NOT: sdc1 $f12, +; O32-FPXX-INV-NOT: sdc1 $f13, +; O32-FPXX-INV-NOT: sdc1 $f14, +; O32-FPXX-INV-NOT: sdc1 $f15, +; O32-FPXX-INV-NOT: sdc1 $f16, +; O32-FPXX-INV-NOT: sdc1 $f17, +; O32-FPXX-INV-NOT: sdc1 $f18, +; O32-FPXX-INV-NOT: sdc1 $f19, +; O32-FPXX-INV-NOT: sdc1 $f21, +; O32-FPXX-INV-NOT: sdc1 $f23, +; O32-FPXX-INV-NOT: sdc1 $f25, +; O32-FPXX-INV-NOT: sdc1 $f27, +; O32-FPXX-INV-NOT: sdc1 $f29, +; O32-FPXX-INV-NOT: sdc1 $f31, + +; O32-FPXX: addiu $sp, $sp, -48 +; O32-FPXX-DAG: sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp) +; O32-FPXX-DAG: sdc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp) +; O32-FPXX-DAG: sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp) +; O32-FPXX-DAG: sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp) +; O32-FPXX-DAG: sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp) +; O32-FPXX-DAG: sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp) +; O32-FPXX-DAG: ldc1 [[F20]], [[OFF20]]($sp) +; O32-FPXX-DAG: ldc1 [[F22]], [[OFF22]]($sp) +; O32-FPXX-DAG: ldc1 [[F24]], [[OFF24]]($sp) +; O32-FPXX-DAG: ldc1 [[F26]], [[OFF26]]($sp) +; O32-FPXX-DAG: ldc1 [[F28]], [[OFF28]]($sp) +; O32-FPXX-DAG: ldc1 [[F30]], [[OFF30]]($sp) +; O32-FPXX: addiu $sp, $sp, 48 diff --git a/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll b/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll new file mode 100644 index 000000000000..489879e98ad3 --- /dev/null +++ b/test/CodeGen/Mips/cconv/callee-saved-fpxx1.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=mips -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=O32-FP64-INV %s +; RUN: llc -march=mipsel -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=O32-FP64-INV %s + +; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s +; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s + +; RUN-TODO: llc -march=mips64 -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s +; RUN-TODO: llc -march=mips64el -mattr=+o32,+fpxx < %s | FileCheck --check-prefix=O32-FPXX %s + +define void @fpu_clobber() nounwind { +entry: + call void asm "# Clobber", "~{$f21}"() + ret void +} + +; O32-FPXX-LABEL: fpu_clobber: + +; O32-FPXX: addiu $sp, $sp, -8 + +; O32-FP64-INV-NOT: sdc1 $f20, +; O32-FPXX-DAG: sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp) +; O32-FPXX-DAG: ldc1 [[F20]], [[OFF20]]($sp) + +; O32-FPXX: addiu $sp, $sp, 8 From 9478f0a6ffa078c96f3f306f1a421bb41c110304 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 10 Jul 2014 16:04:04 +0000 Subject: [PATCH 0750/1155] Mips: Silence a -Wcovered-switch-default Remove a default label which covered no enumerators, replace it with a llvm_unreachable. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212729 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp index 3202b5e60d44..52d5dd3ea5c3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp @@ -23,9 +23,9 @@ uint8_t MipsABIFlagsSection::getFpABIValue() { if (Is32BitABI) return OddSPReg ? Val_GNU_MIPS_ABI_FP_64 : Val_GNU_MIPS_ABI_FP_64A; return Val_GNU_MIPS_ABI_FP_DOUBLE; - default: - return 0; } + + llvm_unreachable("unexpected fp abi value"); } StringRef MipsABIFlagsSection::getFpABIString(FpABIKind Value) { From b1beb01d822750b39291c8d059a01de5d4c6f422 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 10 Jul 2014 16:07:11 +0000 Subject: [PATCH 0751/1155] Feeding isSafeToSpeculativelyExecute its DataLayout pointer (in Sink) This is the one remaining place I see where passing isSafeToSpeculativelyExecute a DataLayout pointer might matter (at least for loads) -- I think I got the others in r212720. Most of the other remaining callers of isSafeToSpeculativelyExecute only use it for call sites (or otherwise exclude loads). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212730 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/Sink.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 482c33aa6e0b..7348c45c5d37 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" @@ -34,6 +35,7 @@ namespace { DominatorTree *DT; LoopInfo *LI; AliasAnalysis *AA; + const DataLayout *DL; public: static char ID; // Pass identification @@ -98,6 +100,8 @@ bool Sinking::runOnFunction(Function &F) { DT = &getAnalysis().getDomTree(); LI = &getAnalysis(); AA = &getAnalysis(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; bool MadeChange, EverMadeChange = false; @@ -193,7 +197,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!isSafeToSpeculativelyExecute(Inst)) + if (!isSafeToSpeculativelyExecute(Inst, DL)) return false; // We don't want to sink across a critical edge if we don't dominate the From c1a6d65d21545ad1622e2f8041c6a55756b71599 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 10 Jul 2014 16:26:10 +0000 Subject: [PATCH 0752/1155] IR: Aliases don't belong to an explicit comdat Aliases inherit their comdat from their aliasee, they don't have an explicit comdat. This fixes PR20279. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212732 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/AsmWriter.cpp | 5 ----- test/Feature/comdat.ll | 3 +++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 87d26b164bb5..a7499bc09b30 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -1528,11 +1528,6 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { writeOperand(Aliasee, !isa(Aliasee)); } - if (GA->hasComdat()) { - Out << ", comdat "; - PrintLLVMName(Out, GA->getComdat()->getName(), ComdatPrefix); - } - printInfoComment(*GA); Out << '\n'; } diff --git a/test/Feature/comdat.ll b/test/Feature/comdat.ll index 6b94a418582f..05fb87c648ee 100644 --- a/test/Feature/comdat.ll +++ b/test/Feature/comdat.ll @@ -9,6 +9,9 @@ $f2 = comdat any @v = global i32 0, comdat $f ; CHECK: @v = global i32 0, comdat $f +@a = alias i32* @v +; CHECK: @a = alias i32* @v{{$}} + define void @f() comdat $f { ret void } From 5b8419d32e09e66bc9ca00ef24a204f7ac764f9d Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Thu, 10 Jul 2014 17:13:27 +0000 Subject: [PATCH 0753/1155] InstCombine: Fix a crash in Descale for multiply-by-zero Fix a crash in `InstCombiner::Descale()` when a multiply-by-zero gets created as an argument to a GEP partway through an iteration, causing -instcombine to optimize the GEP before the multiply. rdar://problem/17615671 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212742 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstructionCombining.cpp | 6 ++++++ test/Transforms/InstCombine/descale-zero.ll | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 test/Transforms/InstCombine/descale-zero.ll diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 46e3bfc7e44e..08e24461a610 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1120,6 +1120,12 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { return nullptr; } + // If Op is zero then Val = Op * Scale. + if (match(Op, m_Zero())) { + NoSignedWrap = true; + return Op; + } + // We know that we can successfully descale, so from here on we can safely // modify the IR. Op holds the descaled version of the deepest term in the // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll new file mode 100644 index 000000000000..7990fdb3eca3 --- /dev/null +++ b/test/Transforms/InstCombine/descale-zero.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +define internal i8* @descale_zero() { +entry: +; CHECK: load i16** inttoptr (i64 48 to i16**), align 16 +; CHECK-NEXT: bitcast i16* +; CHECK-NEXT: ret i8* + %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16 + %num = load i64* inttoptr (i64 64 to i64*), align 64 + %num_times_2 = shl i64 %num, 1 + %num_times_2_plus_4 = add i64 %num_times_2, 4 + %i8_ptr = bitcast i16* %i16_ptr to i8* + %i8_ptr_num_times_2_plus_4 = getelementptr i8* %i8_ptr, i64 %num_times_2_plus_4 + %num_times_neg2 = mul i64 %num, -2 + %num_times_neg2_minus_4 = add i64 %num_times_neg2, -4 + %addr = getelementptr i8* %i8_ptr_num_times_2_plus_4, i64 %num_times_neg2_minus_4 + ret i8* %addr +} From 282eda9b744f6e685495c8908dd9c034c4588a71 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 10 Jul 2014 17:26:51 +0000 Subject: [PATCH 0754/1155] Make it possible for the Subtarget to change between function passes in the mips back end. This, unfortunately, required a bit of churn in the various predicates to use a pointer rather than a reference. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212744 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 6 +-- lib/Target/Mips/MipsDSPInstrFormats.td | 4 +- lib/Target/Mips/MipsFastISel.cpp | 8 ++-- lib/Target/Mips/MipsISelDAGToDAG.cpp | 3 +- lib/Target/Mips/MipsISelDAGToDAG.h | 4 +- lib/Target/Mips/MipsInstrFPU.td | 8 ++-- lib/Target/Mips/MipsInstrInfo.td | 54 +++++++++++++------------- lib/Target/Mips/MipsMSAInstrFormats.td | 2 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 24 ++++++------ 9 files changed, 57 insertions(+), 56 deletions(-) diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 4e86a27e323c..6672aef110a3 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - if (!Subtarget.inMips16Mode()) + if (!Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); } @@ -226,9 +226,9 @@ bool Mips16DAGToDAGISel::selectAddr16( const LSBaseSDNode *LS = dyn_cast(Parent); if (LS) { - if (LS->getMemoryVT() == MVT::f32 && Subtarget.hasMips4_32r2()) + if (LS->getMemoryVT() == MVT::f32 && Subtarget->hasMips4_32r2()) return false; - if (LS->getMemoryVT() == MVT::f64 && Subtarget.hasMips4_32r2()) + if (LS->getMemoryVT() == MVT::f64 && Subtarget->hasMips4_32r2()) return false; } } diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td index cf09113cd8ad..b5d52ced9d3d 100644 --- a/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/lib/Target/Mips/MipsDSPInstrFormats.td @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -def HasDSP : Predicate<"Subtarget.hasDSP()">, +def HasDSP : Predicate<"Subtarget->hasDSP()">, AssemblerPredicate<"FeatureDSP">; -def HasDSPR2 : Predicate<"Subtarget.hasDSPR2()">, +def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">, AssemblerPredicate<"FeatureDSPR2">; // Fields. diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 375d9b205542..617801ba8c67 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -41,7 +41,7 @@ class MipsFastISel final : public FastISel { const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; - const MipsSubtarget &Subtarget; + const MipsSubtarget *Subtarget; MipsFunctionInfo *MFI; // Convenience variables to avoid some queries. @@ -56,11 +56,11 @@ class MipsFastISel final : public FastISel { M(const_cast(*funcInfo.Fn->getParent())), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - Subtarget(TM.getSubtarget()) { + Subtarget(&TM.getSubtarget()) { MFI = funcInfo.MF->getInfo(); Context = &funcInfo.Fn->getContext(); - TargetSupported = ((Subtarget.getRelocationModel() == Reloc::PIC_) && - (Subtarget.hasMips32r2() && (Subtarget.isABI_O32()))); + TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) && + (Subtarget->hasMips32r2() && (Subtarget->isABI_O32()))); } bool TargetSelectInstruction(const Instruction *I) override; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 90cff631931f..0bdabf37f63b 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -47,6 +47,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &TM.getSubtarget(); bool Ret = SelectionDAGISel::runOnMachineFunction(MF); processFunctionAfterISel(MF); @@ -202,7 +203,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: - assert((Subtarget.systemSupportsUnalignedAccess() || + assert((Subtarget->systemSupportsUnalignedAccess() || cast(Node)->getMemoryVT().getSizeInBits() / 8 <= cast(Node)->getAlignment()) && "Unexpected unaligned loads/stores."); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index 13becb6b5bb9..2a6c875f4e35 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -32,7 +32,7 @@ namespace llvm { class MipsDAGToDAGISel : public SelectionDAGISel { public: explicit MipsDAGToDAGISel(MipsTargetMachine &TM) - : SelectionDAGISel(TM), Subtarget(TM.getSubtarget()) {} + : SelectionDAGISel(TM), Subtarget(&TM.getSubtarget()) {} // Pass Name const char *getPassName() const override { @@ -46,7 +46,7 @@ class MipsDAGToDAGISel : public SelectionDAGISel { /// Keep a pointer to the MipsSubtarget around so that we can make the right /// decision when generating code for different targets. - const MipsSubtarget &Subtarget; + const MipsSubtarget *Subtarget; private: // Include the pieces autogenerated from the target description. diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index aa8f053327f3..2260d53567f6 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -57,13 +57,13 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in // Feature predicates. //===----------------------------------------------------------------------===// -def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, +def IsFP64bit : Predicate<"Subtarget->isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; -def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, +def NotFP64bit : Predicate<"!Subtarget->isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; -def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, +def IsSingleFloat : Predicate<"Subtarget->isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; -def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, +def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index d3dd87836c01..8e9472c2d9d8 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -146,61 +146,61 @@ def MipsSDR : SDNode<"MipsISD::SDR", SDTStore, //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasMips2 : Predicate<"Subtarget.hasMips2()">, +def HasMips2 : Predicate<"Subtarget->hasMips2()">, AssemblerPredicate<"FeatureMips2">; -def HasMips3_32 : Predicate<"Subtarget.hasMips3_32()">, +def HasMips3_32 : Predicate<"Subtarget->hasMips3_32()">, AssemblerPredicate<"FeatureMips3_32">; -def HasMips3_32r2 : Predicate<"Subtarget.hasMips3_32r2()">, +def HasMips3_32r2 : Predicate<"Subtarget->hasMips3_32r2()">, AssemblerPredicate<"FeatureMips3_32r2">; -def HasMips3 : Predicate<"Subtarget.hasMips3()">, +def HasMips3 : Predicate<"Subtarget->hasMips3()">, AssemblerPredicate<"FeatureMips3">; -def HasMips4_32 : Predicate<"Subtarget.hasMips4_32()">, +def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">, AssemblerPredicate<"FeatureMips4_32">; -def HasMips4_32r2 : Predicate<"Subtarget.hasMips4_32r2()">, +def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">, AssemblerPredicate<"FeatureMips4_32r2">; -def HasMips5_32r2 : Predicate<"Subtarget.hasMips5_32r2()">, +def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">, AssemblerPredicate<"FeatureMips5_32r2">; -def HasMips32 : Predicate<"Subtarget.hasMips32()">, +def HasMips32 : Predicate<"Subtarget->hasMips32()">, AssemblerPredicate<"FeatureMips32">; -def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">, +def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">, AssemblerPredicate<"FeatureMips32r2">; -def HasMips32r6 : Predicate<"Subtarget.hasMips32r6()">, +def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">, AssemblerPredicate<"FeatureMips32r6">; -def NotMips32r6 : Predicate<"!Subtarget.hasMips32r6()">, +def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">, AssemblerPredicate<"!FeatureMips32r6">; -def IsGP64bit : Predicate<"Subtarget.isGP64bit()">, +def IsGP64bit : Predicate<"Subtarget->isGP64bit()">, AssemblerPredicate<"FeatureGP64Bit">; -def IsGP32bit : Predicate<"!Subtarget.isGP64bit()">, +def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">, AssemblerPredicate<"!FeatureGP64Bit">; -def HasMips64 : Predicate<"Subtarget.hasMips64()">, +def HasMips64 : Predicate<"Subtarget->hasMips64()">, AssemblerPredicate<"FeatureMips64">; -def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">, +def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">, AssemblerPredicate<"FeatureMips64r2">; -def HasMips64r6 : Predicate<"Subtarget.hasMips64r6()">, +def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">, AssemblerPredicate<"FeatureMips64r6">; -def NotMips64r6 : Predicate<"!Subtarget.hasMips64r6()">, +def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">, AssemblerPredicate<"!FeatureMips64r6">; -def IsN64 : Predicate<"Subtarget.isABI_N64()">, +def IsN64 : Predicate<"Subtarget->isABI_N64()">, AssemblerPredicate<"FeatureN64">; -def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">, +def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; -def HasCnMips : Predicate<"Subtarget.hasCnMips()">, +def HasCnMips : Predicate<"Subtarget->hasCnMips()">, AssemblerPredicate<"FeatureCnMips">; def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, AssemblerPredicate<"FeatureMips32">; def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, AssemblerPredicate<"FeatureMips32">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; -def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">, +def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; -def NotDSP : Predicate<"!Subtarget.hasDSP()">; -def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">, +def NotDSP : Predicate<"!Subtarget->hasDSP()">; +def InMicroMips : Predicate<"Subtarget->inMicroMipsMode()">, AssemblerPredicate<"FeatureMicroMips">; -def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">, +def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">, AssemblerPredicate<"!FeatureMicroMips">; -def IsLE : Predicate<"Subtarget.isLittle()">; -def IsBE : Predicate<"!Subtarget.isLittle()">; -def IsNotNaCl : Predicate<"!Subtarget.isTargetNaCl()">; +def IsLE : Predicate<"Subtarget->isLittle()">; +def IsBE : Predicate<"!Subtarget->isLittle()">; +def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index 6bd0366b52e7..bff2d0fab1ec 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -def HasMSA : Predicate<"Subtarget.hasMSA()">, +def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index d5385be7ddb0..6f35947f8f01 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - if (Subtarget.inMips16Mode()) + if (Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); } @@ -134,7 +134,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC; - if (Subtarget.isABI_N64()) + if (Subtarget->isABI_N64()) RC = (const TargetRegisterClass*)&Mips::GPR64RegClass; else RC = (const TargetRegisterClass*)&Mips::GPR32RegClass; @@ -142,7 +142,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { V0 = RegInfo.createVirtualRegister(RC); V1 = RegInfo.createVirtualRegister(RC); - if (Subtarget.isABI_N64()) { + if (Subtarget->isABI_N64()) { MF.getRegInfo().addLiveIn(Mips::T9_64); MBB.addLiveIn(Mips::T9_64); @@ -174,7 +174,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MF.getRegInfo().addLiveIn(Mips::T9); MBB.addLiveIn(Mips::T9); - if (Subtarget.isABI_N32()) { + if (Subtarget->isABI_N32()) { // lui $v0, %hi(%neg(%gp_rel(fname))) // addu $v1, $v0, $t9 // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) @@ -187,7 +187,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { return; } - assert(Subtarget.isABI_O32()); + assert(Subtarget->isABI_O32()); // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: @@ -408,7 +408,7 @@ bool MipsSEDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base, // * MSA is enabled // * N is a ISD::BUILD_VECTOR representing a constant splat bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { - if (!Subtarget.hasMSA()) + if (!Subtarget->hasMSA()) return false; BuildVectorSDNode *Node = dyn_cast(N); @@ -422,7 +422,7 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget.isLittle())) + !Subtarget->isLittle())) return false; Imm = SplatValue; @@ -648,7 +648,7 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } case ISD::ADDE: { - if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC. + if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. break; SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); @@ -658,11 +658,11 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - if (Subtarget.isGP64bit()) { + if (Subtarget->isGP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO_64, MVT::i64); Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); - } else if (Subtarget.isFP64bit()) { + } else if (Subtarget->isFP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); Result = CurDAG->getMachineNode(Mips::BuildPairF64_64, DL, MVT::f64, @@ -813,12 +813,12 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { EVT ResVecTy = BVN->getValueType(0); EVT ViaVecTy; - if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector()) + if (!Subtarget->hasMSA() || !BVN->getValueType(0).is128BitVector()) return std::make_pair(false, nullptr); if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget.isLittle())) + !Subtarget->isLittle())) return std::make_pair(false, nullptr); switch (SplatBitSize) { From b5a006e88f78d0c2e88aed1941062951827870ef Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 10 Jul 2014 17:26:54 +0000 Subject: [PATCH 0755/1155] Add the CSR company and the Kalimba DSP processor to Triple. Patch by Matthew Gardiner with fixes by me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212745 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/Triple.h | 6 ++++-- lib/Support/Triple.cpp | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index a2e841162c6e..2867a0ea8a3f 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -76,7 +76,8 @@ class Triple { le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten) amdil, // amdil: amd IL spir, // SPIR: standard portable IR for OpenCL 32-bit version - spir64 // SPIR: standard portable IR for OpenCL 64-bit version + spir64, // SPIR: standard portable IR for OpenCL 64-bit version + kalimba // Kalimba: generic kalimba }; enum VendorType { UnknownVendor, @@ -89,7 +90,8 @@ class Triple { Freescale, IBM, ImaginationTechnologies, - NVIDIA + NVIDIA, + CSR }; enum OSType { UnknownOS, diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 445502d33204..b74ee13f587d 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -50,6 +50,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case amdil: return "amdil"; case spir: return "spir"; case spir64: return "spir64"; + case kalimba: return "kalimba"; } llvm_unreachable("Invalid ArchType!"); @@ -101,6 +102,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case amdil: return "amdil"; case spir: return "spir"; case spir64: return "spir"; + case kalimba: return "kalimba"; } } @@ -117,6 +119,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case IBM: return "ibm"; case ImaginationTechnologies: return "img"; case NVIDIA: return "nvidia"; + case CSR: return "csr"; } llvm_unreachable("Invalid VendorType!"); @@ -208,6 +211,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("amdil", amdil) .Case("spir", spir) .Case("spir64", spir64) + .Case("kalimba", kalimba) .Default(UnknownArch); } @@ -281,6 +285,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("amdil", Triple::amdil) .Case("spir", Triple::spir) .Case("spir64", Triple::spir64) + .Case("kalimba", Triple::kalimba) .Default(Triple::UnknownArch); } @@ -295,6 +300,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("ibm", Triple::IBM) .Case("img", Triple::ImaginationTechnologies) .Case("nvidia", Triple::NVIDIA) + .Case("csr", Triple::CSR) .Default(Triple::UnknownVendor); } @@ -800,6 +806,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::x86: case llvm::Triple::xcore: case llvm::Triple::spir: + case llvm::Triple::kalimba: return 32; case llvm::Triple::arm64: @@ -851,6 +858,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::arm: case Triple::armeb: case Triple::hexagon: + case Triple::kalimba: case Triple::le32: case Triple::mips: case Triple::mipsel: @@ -885,6 +893,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::arm: case Triple::armeb: case Triple::hexagon: + case Triple::kalimba: case Triple::le32: case Triple::msp430: case Triple::r600: From 60079c18bc5f7100423784da38e757febad583a1 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 10 Jul 2014 18:00:53 +0000 Subject: [PATCH 0756/1155] [X86] Mark pseudo instruction TEST8ri_NOEREX as hasSIdeEffects=0. Also, add a case clause in X86InstrInfo::shouldScheduleAdjacent to enable macro-fusion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212747 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 6 ++++-- lib/Target/X86/X86InstrInfo.cpp | 1 + test/CodeGen/X86/testb-je-fusion.ll | 20 ++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/testb-je-fusion.ll diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 368e14b91f40..f2574cc3700e 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1278,8 +1278,10 @@ let isCompare = 1 in { def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the - // register class is constrained to GR8_NOREX. - let isPseudo = 1 in + // register class is constrained to GR8_NOREX. This pseudo is explicitly + // marked side-effect free, since it doesn't have an isel pattern like + // other test instructions. + let isPseudo = 1, hasSideEffects = 0 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } // Defs = [EFLAGS] diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0797fc6041b1..0d3afc43c2bb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5037,6 +5037,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, case X86::TEST16rm: case X86::TEST32rm: case X86::TEST64rm: + case X86::TEST8ri_NOREX: case X86::AND16i16: case X86::AND16ri: case X86::AND16ri8: diff --git a/test/CodeGen/X86/testb-je-fusion.ll b/test/CodeGen/X86/testb-je-fusion.ll new file mode 100644 index 000000000000..9e946ae4ca33 --- /dev/null +++ b/test/CodeGen/X86/testb-je-fusion.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s + +; testb should be scheduled right before je to enable macro-fusion. + +; CHECK: testb $2, %{{[abcd]}}h +; CHECK-NEXT: je + +define i32 @check_flag(i32 %flags, ...) nounwind { +entry: + %and = and i32 %flags, 512 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: + br label %if.end + +if.end: + %hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ] + ret i32 %hasflag +} From 3996e5290a60e24e02381830b02f1105980428db Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 10 Jul 2014 18:04:55 +0000 Subject: [PATCH 0757/1155] [DAG] Further improve the logic in DAGCombiner that folds a pair of shuffles into a single shuffle if the resulting mask is legal. This patch teaches the DAGCombiner how to fold shuffles according to the following new rules: 1. shuffle(shuffle(x, y), undef) -> x 2. shuffle(shuffle(x, y), undef) -> y 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef) 4. shuffle(shuffle(x, y), undef) -> shuffle(y, undef) The backend avoids to combine shuffles according to rules 3. and 4. if the resulting shuffle does not have a legal mask. This is to avoid introducing illegal shuffles that are potentially expanded into a sub-optimal sequence of target specific dag nodes during vector legalization. Added test case combine-vec-shuffle-2.ll to verify that we correctly triggers the new rules when combining shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212748 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 65 +++++++-- test/CodeGen/X86/combine-vec-shuffle-2.ll | 164 ++++++++++++++++++++++ test/CodeGen/X86/widen_shuffle-1.ll | 4 +- 3 files changed, 218 insertions(+), 15 deletions(-) create mode 100644 test/CodeGen/X86/combine-vec-shuffle-2.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f6077b07d8d4..7c42e4d3d218 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10677,17 +10677,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If this shuffle node is simply a swizzle of another shuffle node, - // and it reverses the swizzle of the previous shuffle then we can - // optimize shuffle(shuffle(x, undef), undef) -> x. + // then try to simplify it. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && N1.getOpcode() == ISD::UNDEF) { ShuffleVectorSDNode *OtherSV = cast(N0); - // Shuffle nodes can only reverse shuffles with a single non-undef value. - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) - return SDValue(); - // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && @@ -10704,27 +10699,69 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Idx = OtherSV->getMaskElt(Idx); Mask.push_back(Idx); } + + bool CommuteOperands = false; + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { + // To be valid, the combine shuffle mask should only reference elements + // from one of the two vectors in input to the inner shufflevector. + bool IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // See if the combined mask only reference undefs or elements coming + // from the first shufflevector operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; + + if (!IsValidMask) { + IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // Check that all the elements come from the second shuffle operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; + CommuteOperands = IsValidMask; + } + + // Early exit if the combined shuffle mask is not valid. + if (!IsValidMask) + return SDValue(); + } + // See if this pair of shuffles can be safely folded according to either + // of the following rules: + // shuffle(shuffle(x, y), undef) -> x + // shuffle(shuffle(x, undef), undef) -> x + // shuffle(shuffle(x, y), undef) -> y bool IsIdentityMask = true; + unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { // Skip Undefs. if (Mask[i] < 0) continue; // The combined shuffle must map each index to itself. - IsIdentityMask = (unsigned)Mask[i] == i; - } - - if (IsIdentityMask) - // optimize shuffle(shuffle(x, undef), undef) -> x. + IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; + } + + if (IsIdentityMask) { + if (CommuteOperands) + // optimize shuffle(shuffle(x, y), undef) -> y. + return OtherSV->getOperand(1); + + // optimize shuffle(shuffle(x, undef), undef) -> x + // optimize shuffle(shuffle(x, y), undef) -> x return OtherSV->getOperand(0); + } // It may still be beneficial to combine the two shuffles if the // resulting shuffle is legal. - // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). - if (TLI.isShuffleMaskLegal(Mask, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + if (TLI.isShuffleMaskLegal(Mask, VT)) { + if (!CommuteOperands) + // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + &Mask[0]); + + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), &Mask[0]); + } } return SDValue(); diff --git a/test/CodeGen/X86/combine-vec-shuffle-2.ll b/test/CodeGen/X86/combine-vec-shuffle-2.ll new file mode 100644 index 000000000000..7ab7f8091250 --- /dev/null +++ b/test/CodeGen/X86/combine-vec-shuffle-2.ll @@ -0,0 +1,164 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s + +; Check that DAGCombiner correctly folds the following pairs of shuffles +; using the following rules: +; 1. shuffle(shuffle(x, y), undef) -> x +; 2. shuffle(shuffle(x, y), undef) -> y +; 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef) +; 4. shuffle(shuffle(x, y), undef) -> shuffle(undef, y) +; +; Rules 3. and 4. are used only if the resulting shuffle mask is legal. + +define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test1 +; Mask: [3,0,0,1] +; CHECK: pshufd $67 +; CHECK-NEXT: ret + + +define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test2 +; Mask: [2,0,0,3] +; CHECK: pshufd $-62 +; CHECK-NEXT: ret + + +define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test3 +; Mask: [2,0,0,3] +; CHECK: pshufd $-62 +; CHECK-NEXT: ret + + +define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test4 +; Mask: [0,0,0,1] +; CHECK: pshufd $64 +; CHECK-NEXT: ret + + +define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test5 +; Mask: [1,1] +; CHECK: movhlps +; CHECK-NEXT: ret + + +define <4 x i32> @test6(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test6 +; Mask: [2,0,0,0] +; CHECK: pshufd $2 +; CHECK-NEXT: ret + + +define <4 x i32> @test7(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test7 +; Mask: [0,2,0,2] +; CHECK: pshufd $-120 +; CHECK-NEXT: ret + + +define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test8 +; Mask: [1,0,3,0] +; CHECK: pshufd $49 +; CHECK-NEXT: ret + + +define <4 x i32> @test9(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test9 +; Mask: [1,3,0,2] +; CHECK: pshufd $-115 +; CHECK-NEXT: ret + + +define <4 x i32> @test10(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test10 +; Mask: [1,0,1,0] +; CHECK: pshufd $17 +; CHECK-NEXT: ret + + +define <4 x i32> @test11(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test11 +; Mask: [1,0,2,1] +; CHECK: pshufd $97 +; CHECK-NEXT: ret + + +define <4 x i32> @test12(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test12 +; Mask: [0,0,0,0] +; CHECK: pshufd $0 +; CHECK-NEXT: ret + + +; The following pair of shuffles is folded into vector %A. +define <4 x i32> @test13(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test13 +; CHECK-NOT: pshufd +; CHECK: ret + + +; The following pair of shuffles is folded into vector %B. +define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test14 +; CHECK-NOT: pshufd +; CHECK: ret + diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 803402b1f1f4..a355b75fafcf 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -33,7 +33,9 @@ entry: define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK-LABEL: shuf3: -; CHECK: shufps +; CHECK-NOT: movlhps +; CHECK-NOT: shufps +; CHECK: pshufd %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> From b730c3d28dc940fba7720aeb5eb1ca52088a6177 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Jul 2014 18:21:04 +0000 Subject: [PATCH 0758/1155] Revert "Revert r212640, "Add trunc (select c, a, b) -> select c (trunc a), (trunc b) combine."" Don't try to convert the select condition type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212750 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 +++++++++ test/CodeGen/R600/select64.ll | 35 ++++++++++++++++++++++++ test/CodeGen/X86/select.ll | 8 ++++++ test/CodeGen/X86/shift-parts.ll | 4 +-- 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7c42e4d3d218..707e48add407 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6022,6 +6022,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (select c, a, b) -> select c, (trunc a), (trunc b) + if (N0.getOpcode() == ISD::SELECT) { + EVT SrcVT = N0.getValueType(); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && + TLI.isTruncateFree(SrcVT, VT)) { + SDLoc SL(N0); + SDValue Cond = N0.getOperand(0); + SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll index 6b87d9865ad6..dba25e3bd21e 100644 --- a/test/CodeGen/R600/select64.ll +++ b/test/CodeGen/R600/select64.ll @@ -13,3 +13,38 @@ entry: store i64 %1, i64 addrspace(1)* %out ret void } + +; CHECK-LABEL: @select_trunc_i64 +; CHECK: V_CNDMASK_B32 +; CHECK-NOT: V_CNDMASK_B32 +define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %sel = select i1 %cmp, i64 0, i64 %in + %trunc = trunc i64 %sel to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: @select_trunc_i64_2 +; CHECK: V_CNDMASK_B32 +; CHECK-NOT: V_CNDMASK_B32 +define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %sel = select i1 %cmp, i64 %a, i64 %b + %trunc = trunc i64 %sel to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: @v_select_trunc_i64_2 +; CHECK: V_CNDMASK_B32 +; CHECK-NOT: V_CNDMASK_B32 +define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %a = load i64 addrspace(1)* %aptr, align 8 + %b = load i64 addrspace(1)* %bptr, align 8 + %sel = select i1 %cmp, i64 %a, i64 %b + %trunc = trunc i64 %sel to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll index cdd258d92031..654e8652cfcb 100644 --- a/test/CodeGen/X86/select.ll +++ b/test/CodeGen/X86/select.ll @@ -357,3 +357,11 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; ATOM: cmpl $15, %edi ; ATOM: cmovgel %edx } + +; CHECK-LABEL: @trunc_select_miscompile +; CHECK-NOT: sarb +define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) { + %tmp1 = select i1 %cc, i32 3, i32 2 + %tmp2 = shl i32 %a, %tmp1 + ret i32 %tmp2 +} \ No newline at end of file diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll index ddad307715a4..763da6397101 100644 --- a/test/CodeGen/X86/shift-parts.ll +++ b/test/CodeGen/X86/shift-parts.ll @@ -7,13 +7,13 @@ ; CHECK: shrdq -define i32 @int87(i32 %uint64p_8) nounwind { +define i32 @int87(i32 %uint64p_8, i1 %cond) nounwind { entry: %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; [#uses=1] br label %for.cond for.cond: ; preds = %for.cond, %entry - %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; [#uses=1] + %call3.in.in.in.v = select i1 %cond, i320 192, i320 128 ; [#uses=1] %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; [#uses=1] %call3.in = trunc i320 %call3.in.in.in to i32 ; [#uses=1] %tobool = icmp eq i32 %call3.in, 0 ; [#uses=1] From 4d3b6131c790268f22514b572e453f4648ad50a8 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 10 Jul 2014 18:59:41 +0000 Subject: [PATCH 0759/1155] Extend the test coverage in combine-vec-shuffle-2.ll adding some negative tests. Add test cases where we don't expect to trigger the combine optimizations introduced at revision 212748. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212756 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/combine-vec-shuffle-2.ll | 89 +++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/test/CodeGen/X86/combine-vec-shuffle-2.ll b/test/CodeGen/X86/combine-vec-shuffle-2.ll index 7ab7f8091250..877d38260d61 100644 --- a/test/CodeGen/X86/combine-vec-shuffle-2.ll +++ b/test/CodeGen/X86/combine-vec-shuffle-2.ll @@ -162,3 +162,92 @@ define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) { ; CHECK-NOT: pshufd ; CHECK: ret + +; Verify that we don't optimize the following cases. We expect more than one shuffle. + +define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test15 +; CHECK: shufps $114 +; CHECK-NEXT: pshufd $-58 +; CHECK-NEXT: ret + + +define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test16 +; CHECK: blendps $10 +; CHECK-NEXT: pshufd $-58 +; CHECK-NEXT: ret + + +define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test17 +; CHECK: shufps $120 +; CHECK-NEXT: pshufd $-58 +; CHECK-NEXT: ret + + +define <4 x i32> @test18(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test18 +; CHECK: blendps $11 +; CHECK-NEXT: pshufd $-59 +; CHECK-NEXT: ret + +define <4 x i32> @test19(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test19 +; CHECK: shufps $-104 +; CHECK-NEXT: pshufd $2 +; CHECK-NEXT: ret + + +define <4 x i32> @test20(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test20 +; CHECK: shufps $11 +; CHECK-NEXT: pshufd $-58 +; CHECK-NEXT: ret + + +define <4 x i32> @test21(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test21 +; CHECK: shufps $120 +; CHECK-NEXT: pshufd $-60 +; CHECK-NEXT: ret + + +define <4 x i32> @test22(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test22 +; CHECK: blendps $11 +; CHECK-NEXT: pshufd $-43 +; CHECK-NEXT: ret + From fc6a659676e95e11b086ce69ebfdcb6343a242e3 Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Thu, 10 Jul 2014 22:23:30 +0000 Subject: [PATCH 0760/1155] [mips] Emit two CFI offset directives per double precision SDC1/LDC1 instead of just one for FR=1 registers Differential Revision: http://reviews.llvm.org/D4310 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212769 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCallingConv.td | 9 +++-- lib/Target/Mips/MipsSEFrameLowering.cpp | 16 ++++++++ test/CodeGen/Mips/cconv/return-hard-float.ll | 13 +++++++ test/CodeGen/Mips/cfi_offset.ll | 41 ++++++++++++++++++++ 4 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/Mips/cfi_offset.ll diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 007213c848f8..85704b9917a3 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -26,9 +26,9 @@ def RetCC_MipsO32 : CallingConv<[ // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, - // f64 arguments are returned in D0_64 and D1_64 in FP64bit mode or + // f64 arguments are returned in D0_64 and D2_64 in FP64bit mode or // in D0 and D1 in FP32bit mode. - CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D1_64]>>>, + CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D2_64]>>>, CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", CCAssignToReg<[D0, D1]>>> ]>; @@ -247,8 +247,9 @@ def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, (sequence "S%u", 7, 0))>; -def CSR_O32_FP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 20), RA, FP, - (sequence "S%u", 7, 0))>; +def CSR_O32_FP64 : + CalleeSavedRegs<(add (decimate (sequence "D%u_64", 30, 20), 2), RA, FP, + (sequence "S%u", 7, 0))>; def CSR_N32 : CalleeSavedRegs<(add D20_64, D22_64, D24_64, D26_64, D28_64, D30_64, RA_64, FP_64, GP_64, diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 6573070f7031..42fc8d13ce01 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -343,6 +343,22 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + } else if (Mips::FGR64RegClass.contains(Reg)) { + unsigned Reg0 = MRI->getDwarfRegNum(Reg, true); + unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1; + + if (!STI.isLittle()) + std::swap(Reg0, Reg1); + + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } else { // Reg is either in GPR32 or FGR32. unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( diff --git a/test/CodeGen/Mips/cconv/return-hard-float.ll b/test/CodeGen/Mips/cconv/return-hard-float.ll index 371b3a54598b..3eb26fa9d24f 100644 --- a/test/CodeGen/Mips/cconv/return-hard-float.ll +++ b/test/CodeGen/Mips/cconv/return-hard-float.ll @@ -10,6 +10,9 @@ ; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s ; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s +; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=ALL --check-prefix=032FP64 %s +; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=ALL --check-prefix=032FP64 %s + ; Test the float returns for all ABI's and byte orders as specified by ; section 5 of MD00305 (MIPS ABIs Described). @@ -44,3 +47,13 @@ entry: ; N32-DAG: ldc1 $f0, %lo(double)([[R1:\$[0-9]+]]) ; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)($1) ; N64-DAG: ldc1 $f0, 0([[R1]]) + +define { double, double } @retComplexDouble() #0 { + %retval = alloca { double, double }, align 8 + %1 = load { double, double }* %retval + ret { double, double } %1 +} + +; ALL-LABEL: retComplexDouble: +; 032FP64-DAG: ldc1 $f0, 0($sp) +; 032FP64-DAG: ldc1 $f2, 8($sp) diff --git a/test/CodeGen/Mips/cfi_offset.ll b/test/CodeGen/Mips/cfi_offset.ll new file mode 100644 index 000000000000..e23855bd65d2 --- /dev/null +++ b/test/CodeGen/Mips/cfi_offset.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=mips -mattr=+o32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EB +; RUN: llc -march=mipsel -mattr=+o32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EL +; RUN: llc -march=mips -mattr=+o32,+fpxx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EB +; RUN: llc -march=mipsel -mattr=+o32,+fpxx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EL +; RUN: llc -march=mips -mattr=+o32,+fp64 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EB +; RUN: llc -march=mipsel -mattr=+o32,+fp64 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EL + +@var = global double 0.0 + +declare void @foo(...) + +define void @bar() { + +; CHECK-LABEL: bar: + +; CHECK: .cfi_def_cfa_offset 40 +; CHECK: sdc1 $f22, 32($sp) +; CHECK: sdc1 $f20, 24($sp) +; CHECK: sw $ra, 20($sp) +; CHECK: sw $16, 16($sp) + +; CHECK-EB: .cfi_offset 55, -8 +; CHECK-EB: .cfi_offset 54, -4 +; CHECK-EB: .cfi_offset 53, -16 +; CHECK-EB: .cfi_offset 52, -12 + +; CHECK-EL: .cfi_offset 54, -8 +; CHECK-EL: .cfi_offset 55, -4 +; CHECK-EL: .cfi_offset 52, -16 +; CHECK-EL: .cfi_offset 53, -12 + +; CHECK: .cfi_offset 31, -20 +; CHECK: .cfi_offset 16, -24 + + %val1 = load volatile double* @var + %val2 = load volatile double* @var + call void (...)* @foo() nounwind + store volatile double %val1, double* @var + store volatile double %val2, double* @var + ret void +} From e39f7f95bc3ff6e422e4b644cd42f1258bada0e8 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Thu, 10 Jul 2014 22:37:28 +0000 Subject: [PATCH 0761/1155] Use the integrated assembler by default on OpenBSD. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212771 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 6875fc653541..df66ca9006b8 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -43,7 +43,8 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; - if (TheTriple.getOS() == llvm::Triple::Solaris) + if (TheTriple.getOS() == llvm::Triple::Solaris || + TheTriple.getOS() == llvm::Triple::OpenBSD) UseIntegratedAssembler = true; } From bb917c2f8a0eb5cf78091432bca1ffc32c241ffe Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Thu, 10 Jul 2014 22:40:18 +0000 Subject: [PATCH 0762/1155] SelectionDAG: Factor FP_TO_SINT lower code out of DAGLegalizer Move the code to a helper function to allow calls from TypeLegalizer. No functionality change intended Signed-off-by: Jan Vesely Reviewed-by: Tom Stellard Reviewed-by: Owen Anderson git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212772 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 6 ++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 61 +------------------- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 62 +++++++++++++++++++++ 3 files changed, 71 insertions(+), 58 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 5e9978d12e8f..180f0f2ef8e8 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -2564,6 +2564,12 @@ class TargetLowering : public TargetLoweringBase { SDValue LH = SDValue(), SDValue RL = SDValue(), SDValue RH = SDValue()) const; + /// Expand float(f32) to SINT(i64) conversion + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + //===--------------------------------------------------------------------===// // Instruction Emitting Hooks // diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c0e8c8c2b05b..0d6a06605ede 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3153,65 +3153,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; - case ISD::FP_TO_SINT: { - EVT VT = Node->getOperand(0).getValueType(); - EVT NVT = Node->getValueType(0); - - // FIXME: Only f32 to i64 conversions are supported. - if (VT != MVT::f32 || NVT != MVT::i64) - break; - - // Expand f32 -> i64 conversion - // This algorithm comes from compiler-rt's implementation of fixsfdi: - // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits()); - SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); - SDValue ExponentLoBit = DAG.getConstant(23, IntVT); - SDValue Bias = DAG.getConstant(127, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), - IntVT); - SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); - SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); - - SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); - - SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), - DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT))); - SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); - - SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), - DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT))); - Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); - - SDValue R = DAG.getNode(ISD::OR, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), - DAG.getConstant(0x00800000, IntVT)); - - R = DAG.getZExtOrTrunc(R, dl, NVT); - - - R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, - DAG.getNode(ISD::SHL, dl, NVT, R, - DAG.getZExtOrTrunc( - DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), - dl, TLI.getShiftAmountTy(IntVT))), - DAG.getNode(ISD::SRL, dl, NVT, R, - DAG.getZExtOrTrunc( - DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), - dl, TLI.getShiftAmountTy(IntVT))), - ISD::SETGT); - - SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, - DAG.getNode(ISD::XOR, dl, NVT, R, Sign), - Sign); - - Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), - DAG.getConstant(0, NVT), Ret, ISD::SETLT)); + case ISD::FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); break; - } case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 42372a21ed7a..05ace413bfdf 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2885,3 +2885,65 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, } return false; } + +bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + SDLoc dl(SDValue(Node, 0)); + + // FIXME: Only f32 to i64 conversions are supported. + if (VT != MVT::f32 || NVT != MVT::i64) + return false; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, IntVT); + SDValue Bias = DAG.getConstant(127, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + + SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT))); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT))); + Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, NVT); + + + R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, getShiftAmountTy(IntVT))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, getShiftAmountTy(IntVT))), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(ISD::XOR, dl, NVT, R, Sign), + Sign); + + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), + DAG.getConstant(0, NVT), Ret, ISD::SETLT); + return true; +} From 865527a09b40888d5176a6800ecb602f1ff6d353 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Thu, 10 Jul 2014 22:40:21 +0000 Subject: [PATCH 0763/1155] R600: Implement float to long/ulong Use alg. from LegalizeDAG.cpp Move Expand setting to SIISellowering v2: Extend existing tests instead of creating new ones v3: use separate LowerFPTOSINT function v4: use TargetLowering::expandFP_TO_SINT add comment about using FP_TO_SINT for uints Signed-off-by: Jan Vesely Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212773 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 - lib/Target/R600/R600ISelLowering.cpp | 16 +- lib/Target/R600/SIISelLowering.cpp | 3 + test/CodeGen/R600/fp_to_sint.ll | 213 ++++++++++++++++++++++--- test/CodeGen/R600/fp_to_sint_i64.ll | 12 -- test/CodeGen/R600/fp_to_uint.ll | 210 +++++++++++++++++++++--- 6 files changed, 404 insertions(+), 51 deletions(-) delete mode 100644 test/CodeGen/R600/fp_to_sint_i64.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 0ada7a3ed827..97771423567c 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -271,7 +271,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::ROTR, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i64, Expand); setOperationAction(ISD::MULHS, MVT::i64, Expand); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 7f3560a4eba8..50411f40443d 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -82,6 +82,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); @@ -839,8 +841,20 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, default: AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); return; - case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + case ISD::FP_TO_UINT: + if (N->getValueType(0) == MVT::i1) { + Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + return; + } + // Fall-through. Since we don't care about out of bounds values + // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint + // considers some extra cases which are not necessary here. + case ISD::FP_TO_SINT: { + SDValue Result; + if (expandFP_TO_SINT(N, Result, DAG)) + Results.push_back(Result); return; + } case ISD::UDIV: { SDValue Op = SDValue(N, 0); SDLoc DL(Op); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index b13c3b8dee2e..63d3d3852178 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -165,6 +165,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index 8302b4f8233e..235045aaaaaa 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,31 +1,206 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK - -; R600-CHECK: @fp_to_sint_v2i32 -; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; SI-CHECK: @fp_to_sint_v2i32 -; SI-CHECK: V_CVT_I32_F32_e32 -; SI-CHECK: V_CVT_I32_F32_e32 +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC + +; FUNC-LABEL: @fp_to_sint_v2i32 +; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; SI: V_CVT_I32_F32_e32 +; SI: V_CVT_I32_F32_e32 define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { %result = fptosi <2 x float> %in to <2 x i32> store <2 x i32> %result, <2 x i32> addrspace(1)* %out ret void } -; R600-CHECK: @fp_to_sint_v4i32 -; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}} -; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; SI-CHECK: @fp_to_sint_v4i32 -; SI-CHECK: V_CVT_I32_F32_e32 -; SI-CHECK: V_CVT_I32_F32_e32 -; SI-CHECK: V_CVT_I32_F32_e32 -; SI-CHECK: V_CVT_I32_F32_e32 +; FUNC-LABEL: @fp_to_sint_v4i32 +; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}} +; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; SI: V_CVT_I32_F32_e32 +; SI: V_CVT_I32_F32_e32 +; SI: V_CVT_I32_F32_e32 +; SI: V_CVT_I32_F32_e32 define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %value = load <4 x float> addrspace(1) * %in %result = fptosi <4 x float> %value to <4 x i32> store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } + +; FUNC-LABEL: @fp_to_sint_i64 + +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT + +; Check that the compiler doesn't crash with a "cannot select" error +; SI: S_ENDPGM +define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { +entry: + %0 = fptosi float %in to i64 + store i64 %0, i64 addrspace(1)* %out + ret void +} + +; FUNC: @fp_to_sint_v2i64 +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT + +; SI: S_ENDPGM +define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { + %conv = fptosi <2 x float> %x to <2 x i64> + store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC: @fp_to_sint_v4i64 +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT + +; SI: S_ENDPGM +define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { + %conv = fptosi <4 x float> %x to <4 x i64> + store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fp_to_sint_i64.ll b/test/CodeGen/R600/fp_to_sint_i64.ll deleted file mode 100644 index ec3e19804c57..000000000000 --- a/test/CodeGen/R600/fp_to_sint_i64.ll +++ /dev/null @@ -1,12 +0,0 @@ -; FIXME: Merge into fp_to_sint.ll when EG/NI supports 64-bit types -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s - -; SI-LABEL: @fp_to_sint_i64 -; Check that the compiler doesn't crash with a "cannot select" error -; SI: S_ENDPGM -define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { -entry: - %0 = fptosi float %in to i64 - store i64 %0, i64 addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll index 77db43b39c5f..a13018bdfecf 100644 --- a/test/CodeGen/R600/fp_to_uint.ll +++ b/test/CodeGen/R600/fp_to_uint.ll @@ -1,12 +1,11 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; R600-CHECK: @fp_to_uint_v2i32 -; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI-CHECK: @fp_to_uint_v2i32 -; SI-CHECK: V_CVT_U32_F32_e32 -; SI-CHECK: V_CVT_U32_F32_e32 +; FUNC-LABEL: @fp_to_uint_v2i32 +; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; SI: V_CVT_U32_F32_e32 +; SI: V_CVT_U32_F32_e32 define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { %result = fptoui <2 x float> %in to <2 x i32> @@ -14,16 +13,15 @@ define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { ret void } -; R600-CHECK: @fp_to_uint_v4i32 -; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; SI-CHECK: @fp_to_uint_v4i32 -; SI-CHECK: V_CVT_U32_F32_e32 -; SI-CHECK: V_CVT_U32_F32_e32 -; SI-CHECK: V_CVT_U32_F32_e32 -; SI-CHECK: V_CVT_U32_F32_e32 +; FUNC-LABEL: @fp_to_uint_v4i32 +; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; SI: V_CVT_U32_F32_e32 +; SI: V_CVT_U32_F32_e32 +; SI: V_CVT_U32_F32_e32 +; SI: V_CVT_U32_F32_e32 define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %value = load <4 x float> addrspace(1) * %in @@ -31,3 +29,179 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s + +; FUNC: @fp_to_uint_i64 +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT + +; SI: S_ENDPGM +define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) { + %conv = fptoui float %x to i64 + store i64 %conv, i64 addrspace(1)* %out + ret void +} + +; FUNC: @fp_to_uint_v2i64 +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT + +; SI: S_ENDPGM +define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { + %conv = fptoui <2 x float> %x to <2 x i64> + store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC: @fp_to_uint_v4i64 +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT +; EG-DAG: AND_INT +; EG-DAG: LSHR +; EG-DAG: SUB_INT +; EG-DAG: AND_INT +; EG-DAG: ASHR +; EG-DAG: AND_INT +; EG-DAG: OR_INT +; EG-DAG: SUB_INT +; EG-DAG: LSHL +; EG-DAG: LSHL +; EG-DAG: SUB_INT +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT +; EG-DAG: SETGT_INT +; EG-DAG: XOR_INT +; EG-DAG: XOR_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDGE_INT +; EG-DAG: CNDGE_INT + +; SI: S_ENDPGM +define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { + %conv = fptoui <4 x float> %x to <4 x i64> + store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + ret void +} From 9fe4d6bcd44f20390b9a554a92da2b4ad88c7d38 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 10 Jul 2014 22:57:40 +0000 Subject: [PATCH 0764/1155] This test case doesn't actually need the inliner to reproduce the input. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212775 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/DebugInfo/cross-cu-linkonce.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/test/DebugInfo/cross-cu-linkonce.ll b/test/DebugInfo/cross-cu-linkonce.ll index 16a50122dcf8..660d5709c7a8 100644 --- a/test/DebugInfo/cross-cu-linkonce.ll +++ b/test/DebugInfo/cross-cu-linkonce.ll @@ -5,7 +5,6 @@ ; Built from source: ; $ clang++ a.cpp b.cpp -g -c -emit-llvm ; $ llvm-link a.bc b.bc -o ab.bc -; $ opt -inline ab.bc -o ab-opt.bc ; $ cat a.cpp ; # 1 "func.h" ; inline int func(int i) { From e14048c25af23d89db4661e9c694b70f9af194b7 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 10 Jul 2014 22:59:39 +0000 Subject: [PATCH 0765/1155] Reapply "DebugInfo: Ensure that all debug location scope chains from instructions within a function, lead to the function itself." Committed in r212205 and reverted in r212226 due to msan self-hosting failure, I believe I've got that fixed by r212761 to Clang. Original commit message: "Originally committed in r211723, reverted in r211724 due to failure cases found and fixed (ArgumentPromotion: r211872, Inlining: r212065), committed again in r212085 and reverted again in r212089 after fixing some other cases, such as debug info subprogram lists not keeping track of the function they represent (r212128) and then short-circuiting things like LiveDebugVariables that build LexicalScopes for functions that might not have full debug info. And again, I believe the invariant actually holds for some reasonable amount of code (but I'll keep an eye on the buildbots and see what happens... ). Original commit message: PR20038: DebugInfo: Inlined call sites where the caller has debug info but the call itself has no debug location. This situation does bad things when inlined, so I've fixed Clang not to produce inlinable call sites without locations when the caller has debug info (in the one case where I could find that this occurred). This updates the PR20038 test case to be what clang now produces, and readds the assertion that had to be removed due to this bug. I've also beefed up the debug info verifier to help diagnose these issues in the future, and I hope to add checks to the inliner to just assert-fail if it encounters this situation. If, in the future, we decide we have to cope with this situation, the right thing to do is probably to just remove all the DebugLocs from the inlined instructions." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212776 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 +-- lib/CodeGen/LexicalScopes.cpp | 9 +++++++-- lib/IR/DebugInfo.cpp | 26 ++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 77860c0bd105..1749217d7fdf 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -788,8 +788,7 @@ void DwarfDebug::finishVariableDefinitions() { for (const auto &Var : ConcreteVariables) { DIE *VariableDie = Var->getDIE(); // FIXME: There shouldn't be any variables without DIEs. - if (!VariableDie) - continue; + assert(VariableDie); // FIXME: Consider the time-space tradeoff of just storing the unit pointer // in the ConcreteVariables list, rather than looking it up again here. // DIE::getUnit isn't simple - it walks parent pointers, etc. diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index d12c234bf3b2..f67c717d4e41 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -137,6 +137,8 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { + if (DL.isUnknown()) + return nullptr; MDNode *Scope = nullptr; MDNode *InlinedAt = nullptr; DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); @@ -172,9 +174,12 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { std::make_tuple(Parent, DIDescriptor(Scope), nullptr, false)).first; - if (!Parent && DIDescriptor(Scope).isSubprogram() && - DISubprogram(Scope).describes(MF->getFunction())) + if (!Parent) { + assert(DIDescriptor(Scope).isSubprogram()); + assert(DISubprogram(Scope).describes(MF->getFunction())); + assert(!CurrentFnLexicalScope); CurrentFnLexicalScope = &I->second; + } return &I->second; } diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 5e39b242dbbc..391dec217bd0 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -538,6 +538,32 @@ bool DISubprogram::Verify() const { if (isLValueReference() && isRValueReference()) return false; + if (auto *F = getFunction()) { + LLVMContext &Ctxt = F->getContext(); + for (auto &BB : *F) { + for (auto &I : BB) { + DebugLoc DL = I.getDebugLoc(); + if (DL.isUnknown()) + continue; + + MDNode *Scope = nullptr; + MDNode *IA = nullptr; + // walk the inlined-at scopes + while (DL.getScopeAndInlinedAt(Scope, IA, F->getContext()), IA) + DL = DebugLoc::getFromDILocation(IA); + DL.getScopeAndInlinedAt(Scope, IA, Ctxt); + assert(!IA); + while (!DIDescriptor(Scope).isSubprogram()) { + DILexicalBlockFile D(Scope); + Scope = D.isLexicalBlockFile() + ? D.getScope() + : DebugLoc::getFromDILexicalBlock(Scope).getScope(Ctxt); + } + if (!DISubprogram(Scope).describes(F)) + return false; + } + } + } return DbgNode->getNumOperands() == 20; } From 5cfcad187b8978275e89d5ebdf996a64de471fbc Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 10 Jul 2014 23:26:20 +0000 Subject: [PATCH 0766/1155] [RuntimeDyld] Improve error diagnostic in RuntimeDyldChecker. The compiler often emits assembler-local labels (beginning with 'L') for use in relocation expressions, however these aren't included in the object files. Teach RuntimeDyldChecker to warn the user if they try to use one of these in an expression, since it will never work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212777 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/ExecutionEngine/RuntimeDyldChecker.h | 2 +- .../RuntimeDyld/RuntimeDyldChecker.cpp | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h index 38a4ea1ad522..8dd891e83648 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h +++ b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h @@ -81,7 +81,7 @@ class RuntimeDyldChecker { private: - bool checkSymbolIsValidForLoad(StringRef Symbol) const; + bool isSymbolValid(StringRef Symbol) const; uint64_t getSymbolAddress(StringRef Symbol) const; uint64_t readMemoryAtSymbol(StringRef Symbol, int64_t Offset, unsigned Size) const; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 190bbbf93ff8..b10ec360aa51 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -199,7 +199,7 @@ namespace llvm { StringRef Symbol; std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr); - if (!Checker.checkSymbolIsValidForLoad(Symbol)) + if (!Checker.isSymbolValid(Symbol)) return std::make_pair(EvalResult(("Cannot decode unknown symbol '" + Symbol + "'").str()), ""); @@ -268,7 +268,7 @@ namespace llvm { StringRef Symbol; std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr); - if (!Checker.checkSymbolIsValidForLoad(Symbol)) + if (!Checker.isSymbolValid(Symbol)) return std::make_pair(EvalResult(("Cannot decode unknown symbol '" + Symbol + "'").str()), ""); @@ -304,6 +304,17 @@ namespace llvm { else if (Symbol == "next_pc") return evalNextPC(RemainingExpr); + if (!Checker.isSymbolValid(Symbol)) { + std::string ErrMsg("No known address for symbol '"); + ErrMsg += Symbol; + ErrMsg += "'"; + if (Symbol.startswith("L")) + ErrMsg += " (this appears to be an assembler local label - " + " perhaps drop the 'L'?)"; + + return std::make_pair(EvalResult(ErrMsg), ""); + } + // Looks like a plain symbol reference. return std::make_pair(EvalResult(Checker.getSymbolAddress(Symbol)), RemainingExpr); @@ -397,7 +408,7 @@ namespace llvm { StringRef Symbol; std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr); - if (!Checker.checkSymbolIsValidForLoad(Symbol)) + if (!Checker.isSymbolValid(Symbol)) return std::make_pair(EvalResult(("Cannot dereference unknown symbol '" + Symbol + "'").str()), ""); @@ -612,7 +623,7 @@ bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix, return DidAllTestsPass && (NumRules != 0); } -bool RuntimeDyldChecker::checkSymbolIsValidForLoad(StringRef Symbol) const { +bool RuntimeDyldChecker::isSymbolValid(StringRef Symbol) const { return RTDyld.getSymbolAddress(Symbol) != nullptr; } From eb2fff64dd5055ebcf7bf899f2abb5fe6b148c19 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 10 Jul 2014 23:29:11 +0000 Subject: [PATCH 0767/1155] [RuntimeDyld] Replace a crufty old ARM RuntimeDyld test with a new one that uses RuntimeDyldChecker. This allows us to remove one of the six remaining object files in the LLVM source tree. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212780 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../ARM/MachO_ARM_PIC_relocations.s | 27 ++++++++++++++++++ .../RuntimeDyld/ARM/lit.local.cfg | 3 ++ .../RuntimeDyld/Inputs/arm_secdiff_reloc.o | Bin 616 -> 0 bytes .../RuntimeDyld/macho_relocations.test | 1 - 4 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s create mode 100644 test/ExecutionEngine/RuntimeDyld/ARM/lit.local.cfg delete mode 100644 test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o delete mode 100644 test/ExecutionEngine/RuntimeDyld/macho_relocations.test diff --git a/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s new file mode 100644 index 000000000000..86041835591f --- /dev/null +++ b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s @@ -0,0 +1,27 @@ +# RUN: llvm-mc -triple=armv7s-apple-ios7.0.0 -relocation-model=pic -filetype=obj -o %t.o %s +# RUN: llvm-rtdyld -triple=armv7s-apple-ios7.0.0 -verify -check=%s %t.o +# RUN: rm %t.o + + .syntax unified + .section __TEXT,__text,regular,pure_instructions + .globl bar + .align 2 +bar: +# Check lower 16-bits of section difference relocation +# rtdyld-check: decode_operand(insn1, 1) = (foo-(nextPC+8))[15:0] +insn1: + movw r0, :lower16:(foo-(nextPC+8)) +# Check upper 16-bits of section difference relocation +# rtdyld-check: decode_operand(insn2, 2) = (foo-(nextPC+8))[31:16] +insn2: + movt r0, :upper16:(foo-(nextPC+8)) +nextPC: + add r0, pc, r0 + bx lr + + .globl foo + .align 2 +foo: + bx lr + +.subsections_via_symbols diff --git a/test/ExecutionEngine/RuntimeDyld/ARM/lit.local.cfg b/test/ExecutionEngine/RuntimeDyld/ARM/lit.local.cfg new file mode 100644 index 000000000000..98c6700c209d --- /dev/null +++ b/test/ExecutionEngine/RuntimeDyld/ARM/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True + diff --git a/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o b/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o deleted file mode 100644 index 5392266cf560cb739b8ef0d2d15932f92bf73f2f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 616 zcmX^2>+L@t1_lOhAZ7$&W+46oVkkiPJs>s~AOaK$Uy@o;0_6ay z`1lalh!6xH#DeJu8o{6fBv?R#EDQ}G3Irf#B^@0b<7h30S-Z0QtxOBoD%5g2xOD4nW-hK<>Z(Ll6eW%}kI# lfM6ojU!72XBZv=5BJ$950!jlQHdsDB&5$8J%?K#O003e`IAj0- diff --git a/test/ExecutionEngine/RuntimeDyld/macho_relocations.test b/test/ExecutionEngine/RuntimeDyld/macho_relocations.test deleted file mode 100644 index 92e4dd793ccb..000000000000 --- a/test/ExecutionEngine/RuntimeDyld/macho_relocations.test +++ /dev/null @@ -1 +0,0 @@ -RUN: llvm-rtdyld -printline %p/Inputs/arm_secdiff_reloc.o From 9853f945205a0e1193afc3e84c10be96b4932d1b Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Thu, 10 Jul 2014 23:30:06 +0000 Subject: [PATCH 0768/1155] Partially fix PR20058: reduce compile time for loop unrolling with very high count by reducing calls to SE->forgetLoop git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212782 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/ScalarEvolution.h | 3 ++- lib/Transforms/Utils/LoopUnroll.cpp | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 057082676824..617e54541ee1 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -795,7 +795,8 @@ namespace llvm { /// forgetLoop - This method should be called by the client when it has /// changed a loop in a way that may effect ScalarEvolution's ability to - /// compute a trip count, or if the loop is deleted. + /// compute a trip count, or if the loop is deleted. This call is + /// potentially expensive for large loop bodies. void forgetLoop(const Loop *L); /// forgetValue - This method should be called by the client when it has diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index c86b82cf4fba..ab1c25a75e26 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" @@ -64,10 +65,15 @@ static inline void RemapInstruction(Instruction *I, /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it /// only has one predecessor, and that predecessor only has one successor. -/// The LoopInfo Analysis that is passed will be kept consistent. -/// Returns the new combined block. -static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, - LPPassManager *LPM) { +/// The LoopInfo Analysis that is passed will be kept consistent. If folding is +/// successful references to the containing loop must be removed from +/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have +/// references to the eliminated BB. The argument ForgottenLoops contains a set +/// of loops that have already been forgotten to prevent redundant, expensive +/// calls to ScalarEvolution::forgetLoop. Returns the new combined block. +static BasicBlock * +FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, + SmallPtrSetImpl &ForgottenLoops) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -104,8 +110,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // ScalarEvolution holds references to loop exit blocks. if (LPM) { if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable()) { - if (Loop *L = LI->getLoopFor(BB)) - SE->forgetLoop(L); + if (Loop *L = LI->getLoopFor(BB)) { + if (ForgottenLoops.insert(L)) + SE->forgetLoop(L); + } } } LI->removeBlock(BB); @@ -423,11 +431,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, } // Merge adjacent basic blocks, if possible. + SmallPtrSet ForgottenLoops; for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM, + ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } From 181826c6233c268e1fab14ca3f6cf6ca99059d93 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 11 Jul 2014 02:42:57 +0000 Subject: [PATCH 0769/1155] Revert "Reapply "DebugInfo: Ensure that all debug location scope chains from instructions within a function, lead to the function itself."" This reverts commit r212776. Nope, still seems to be failing on the sanitizer bots... but hey, not the msan self-host anymore, it's failing in asan now. I'll start looking there next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212793 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 ++- lib/CodeGen/LexicalScopes.cpp | 9 ++------- lib/IR/DebugInfo.cpp | 26 -------------------------- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1749217d7fdf..77860c0bd105 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -788,7 +788,8 @@ void DwarfDebug::finishVariableDefinitions() { for (const auto &Var : ConcreteVariables) { DIE *VariableDie = Var->getDIE(); // FIXME: There shouldn't be any variables without DIEs. - assert(VariableDie); + if (!VariableDie) + continue; // FIXME: Consider the time-space tradeoff of just storing the unit pointer // in the ConcreteVariables list, rather than looking it up again here. // DIE::getUnit isn't simple - it walks parent pointers, etc. diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index f67c717d4e41..d12c234bf3b2 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -137,8 +137,6 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { - if (DL.isUnknown()) - return nullptr; MDNode *Scope = nullptr; MDNode *InlinedAt = nullptr; DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); @@ -174,12 +172,9 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { std::make_tuple(Parent, DIDescriptor(Scope), nullptr, false)).first; - if (!Parent) { - assert(DIDescriptor(Scope).isSubprogram()); - assert(DISubprogram(Scope).describes(MF->getFunction())); - assert(!CurrentFnLexicalScope); + if (!Parent && DIDescriptor(Scope).isSubprogram() && + DISubprogram(Scope).describes(MF->getFunction())) CurrentFnLexicalScope = &I->second; - } return &I->second; } diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 391dec217bd0..5e39b242dbbc 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -538,32 +538,6 @@ bool DISubprogram::Verify() const { if (isLValueReference() && isRValueReference()) return false; - if (auto *F = getFunction()) { - LLVMContext &Ctxt = F->getContext(); - for (auto &BB : *F) { - for (auto &I : BB) { - DebugLoc DL = I.getDebugLoc(); - if (DL.isUnknown()) - continue; - - MDNode *Scope = nullptr; - MDNode *IA = nullptr; - // walk the inlined-at scopes - while (DL.getScopeAndInlinedAt(Scope, IA, F->getContext()), IA) - DL = DebugLoc::getFromDILocation(IA); - DL.getScopeAndInlinedAt(Scope, IA, Ctxt); - assert(!IA); - while (!DIDescriptor(Scope).isSubprogram()) { - DILexicalBlockFile D(Scope); - Scope = D.isLexicalBlockFile() - ? D.getScope() - : DebugLoc::getFromDILexicalBlock(Scope).getScope(Ctxt); - } - if (!DISubprogram(Scope).describes(F)) - return false; - } - } - } return DbgNode->getNumOperands() == 20; } From fcc56eb31fb98c1a6b91a545f9cf7652d9291bd8 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Fri, 11 Jul 2014 05:23:12 +0000 Subject: [PATCH 0770/1155] [X86] AVX512: Simplify logic in isCDisp8 It was computing the VL/n case as: MemObjSize = VectorByteSize / ElemByteSize / Divider * ElemByteSize ElemByteSize not only falls out but VectorByteSize/Divider now actually matches the definition of VL/n. Also some formatting fixes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212794 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 2152b21068f3..dbd357cb0083 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -185,7 +185,8 @@ static bool isDisp8(int Value) { /// isCDisp8 - Return true if this signed displacement fits in a 8-bit /// compressed dispacement field. static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) { - assert((TSFlags & X86II::EncodingMask) >> X86II::EncodingShift == X86II::EVEX && + assert(((TSFlags & X86II::EncodingMask) >> + X86II::EncodingShift == X86II::EVEX) && "Compressed 8-bit displacement is only valid for EVEX inst."); unsigned CD8E = (TSFlags >> X86II::EVEX_CD8EShift) & X86II::EVEX_CD8EMask; @@ -195,7 +196,7 @@ static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) { CValue = Value; return isDisp8(Value); } - + unsigned MemObjSize = 1U << CD8E; if (CD8V & 4) { // Fixed vector length @@ -208,10 +209,9 @@ static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) { EVEX_LL += ((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2) ? 2 : 0; assert(EVEX_LL < 3 && ""); - unsigned NumElems = (1U << (EVEX_LL + 4)) / MemObjSize; - NumElems /= 1U << (CD8V & 0x3); - - MemObjSize *= NumElems; + unsigned VectorByteSize = 1U << (EVEX_LL + 4); + unsigned Divider = 1U << (CD8V & 0x3); + MemObjSize = VectorByteSize / Divider; } } From 11c9a50611fb83bdc71d6f25f3cbb372317a2cdc Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Fri, 11 Jul 2014 05:23:25 +0000 Subject: [PATCH 0771/1155] [X86] AVX512: Improve readability of isCDisp8 No functional change. As I was trying to understand this function, I found that variables were reused with confusing names and the broadcast case was a bit too implicit. Hopefully, this is an improvement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212795 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index dbd357cb0083..7c30fc25ae0c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -197,14 +197,23 @@ static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) { return isDisp8(Value); } - unsigned MemObjSize = 1U << CD8E; + unsigned ElemSize = 1U << CD8E; + unsigned MemObjSize; + // The unit of displacement is either + // - the size of a power-of-two number of elements or + // - the size of a single element for broadcasts or + // - the total vector size divided by a power-of-two number. if (CD8V & 4) { // Fixed vector length - MemObjSize *= 1U << (CD8V & 0x3); + unsigned NumElems = 1U << (CD8V & 0x3); + MemObjSize = ElemSize * NumElems; } else { // Modified vector length bool EVEX_b = (TSFlags >> X86II::VEXShift) & X86II::EVEX_B; - if (!EVEX_b) { + if (EVEX_b) + // Broadcast implies element size units. + MemObjSize = ElemSize; + else { unsigned EVEX_LL = ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) ? 1 : 0; EVEX_LL += ((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2) ? 2 : 0; assert(EVEX_LL < 3 && ""); From a533de6f598394c1f1fb7faf689374e2d7560c81 Mon Sep 17 00:00:00 2001 From: Marcello Maggioni Date: Fri, 11 Jul 2014 10:34:36 +0000 Subject: [PATCH 0772/1155] Fixup PHIs in LowerSwitch when a Leaf node is not emitted. This commit fixes bug http://llvm.org/bugs/show_bug.cgi?id=20103. Thanks to Qwertyuiop for the report and the proposed fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212802 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/LowerSwitch.cpp | 41 +++++++++++++++++++++------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index eac693bdf8a4..d6e5bb626805 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -67,8 +67,8 @@ namespace { BasicBlock *switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, - Value *Val, BasicBlock *OrigBlock, - BasicBlock *Default); + Value *Val, BasicBlock *Predecessor, + BasicBlock *OrigBlock, BasicBlock *Default); BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock, BasicBlock *Default); unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); @@ -131,6 +131,21 @@ static raw_ostream& operator<<(raw_ostream &O, return O << "]"; } +static void fixPhis(BasicBlock *Succ, + BasicBlock *OrigBlock, + BasicBlock *NewNode) { + for (BasicBlock::iterator I = Succ->begin(), + E = Succ->getFirstNonPHI(); + I != E; ++I) { + PHINode *PN = cast(I); + + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + if (PN->getIncomingBlock(I) == OrigBlock) + PN->setIncomingBlock(I, NewNode); + } + } +} + // switchConvert - Convert the switch statement into a binary lookup of // the case values. The function recursively builds this tree. // LowerBound and UpperBound are used to keep track of the bounds for Val @@ -139,6 +154,7 @@ static raw_ostream& operator<<(raw_ostream &O, BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, BasicBlock *Default) { unsigned Size = End - Begin; @@ -149,6 +165,7 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, // emitting the code that checks if the value actually falls in the range // because the bounds already tell us so. if (Begin->Low == LowerBound && Begin->High == UpperBound) { + fixPhis(Begin->BB, OrigBlock, Predecessor); return Begin->BB; } return newLeafBlock(*Begin, Val, OrigBlock, Default); @@ -200,21 +217,25 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, dbgs() << "NONE\n"; }); - BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, - NewUpperBound, Val, OrigBlock, Default); - BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, - UpperBound, Val, OrigBlock, Default); - // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. Function* F = OrigBlock->getParent(); BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); - Function::iterator FI = OrigBlock; - F->getBasicBlockList().insert(++FI, NewNode); ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); + + BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, + NewUpperBound, Val, NewNode, OrigBlock, + Default); + BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, + UpperBound, Val, NewNode, OrigBlock, + Default); + + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewNode); NewNode->getInstList().push_back(Comp); + BranchInst::Create(LBranch, RBranch, Comp, NewNode); return NewNode; } @@ -386,7 +407,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { } BasicBlock *SwitchBlock = switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, - OrigBlock, NewDefault); + OrigBlock, OrigBlock, NewDefault); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); From ca4b18075c94a74634f40cc447720e65dc4b4024 Mon Sep 17 00:00:00 2001 From: Marcello Maggioni Date: Fri, 11 Jul 2014 10:36:00 +0000 Subject: [PATCH 0773/1155] Added test for commit r212802 that was missing git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212803 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../LowerSwitch/2014-06-23-PHIlowering.ll | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll diff --git a/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll b/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll new file mode 100644 index 000000000000..c6cddf636a88 --- /dev/null +++ b/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -lowerswitch -S | FileCheck %s + +define i32 @test(i32 %arg) #0 { +; CHECK-LABEL: @test +; CHECK: ;