Update SwiftShader to April code dump. April code dump from Transgaming. Adds new shader compiler.
diff --git a/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj new file mode 100644 index 0000000..e64a03c --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombine.h b/src/LLVM/lib/Transforms/InstCombine/InstCombine.h index e740248..3808278 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombine.h +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombine.h
@@ -11,6 +11,8 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/IRBuilder.h" @@ -20,6 +22,7 @@ namespace llvm { class CallSite; class TargetData; + class DbgDeclareInst; class MemIntrinsic; class MemSetInst; @@ -51,14 +54,14 @@ /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter - : public IRBuilderDefaultInserter { + : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - void InsertHelper(Instruction *I, + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const { - IRBuilderDefaultInserter::InsertHelper(I, BB, InsertPt); + IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } }; @@ -68,7 +71,6 @@ : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; - bool MustPreserveLCSSA; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -76,11 +78,13 @@ /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. - typedef IRBuilder<TargetFolder, InstCombineIRInserter> BuilderTy; + typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {} + InstCombiner() : FunctionPass(ID), TD(0), Builder(0) { + initializeInstCombinerPass(*PassRegistry::getPassRegistry()); + } public: virtual bool runOnFunction(Function &F); @@ -100,7 +104,7 @@ // Instruction *visitAdd(BinaryOperator &I); Instruction *visitFAdd(BinaryOperator &I); - Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); + Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty); Instruction *visitSub(BinaryOperator &I); Instruction *visitFSub(BinaryOperator &I); Instruction *visitMul(BinaryOperator &I); @@ -142,6 +146,8 @@ ConstantInt *RHS); Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); + Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, ICmpInst::Predicate Pred, Value *TheAdd); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, @@ -171,11 +177,14 @@ Instruction *visitSelectInst(SelectInst &SI); Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); Instruction *visitCallInst(CallInst &CI); + Instruction *visitInvokeInst(InvokeInst &II); Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); + Instruction *visitMalloc(Instruction &FI); + Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); @@ -184,15 +193,16 @@ Instruction *visitExtractElementInst(ExtractElementInst &EI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); + Instruction *visitLandingPadInst(LandingPadInst &LI); // visitInstruction - Specify what to return for unhandled instructions... Instruction *visitInstruction(Instruction &I) { return 0; } private: - bool ShouldChangeType(const Type *From, const Type *To) const; + bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V) const; - const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); @@ -201,15 +211,17 @@ /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + Type *Ty); Instruction *visitCallSite(CallSite CS); + Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); + Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - Value *EmitGEPOffset(User *GEP); public: @@ -224,7 +236,15 @@ Worklist.Add(New); return New; } - + + // InsertNewInstWith - same as InsertNewInstBefore, but also sets the + // debug loc. + // + Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { + New->setDebugLoc(Old.getDebugLoc()); + return InsertNewInstBefore(New, Old); + } + // ReplaceInstUsesWith - This method is to be used when an instruction is // found to be dead, replacable with another preexisting expression. Here // we add all uses of I to the worklist, replace all uses of I with the new @@ -238,7 +258,10 @@ // segment of unreachable code, so just clobber the instruction. if (&I == V) V = UndefValue::get(I.getType()); - + + DEBUG(errs() << "IC: Replacing " << I << "\n" + " with " << *V << '\n'); + I.replaceAllUsesWith(V); return &I; } @@ -279,9 +302,16 @@ private: - /// SimplifyCommutative - This performs a few simplifications for - /// commutative operators. - bool SimplifyCommutative(BinaryOperator &I); + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for + /// operators which are associative or commutative. + bool SimplifyAssociativeOrCommutative(BinaryOperator &I); + + /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations + /// which some other binary operation distributes over either by factorizing + /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this + /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is + /// a win). Returns the simplified value, or null if it didn't simplify. + Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. @@ -305,10 +335,7 @@ // into the PHI (which is only possible if all operands to the PHI are // constants). // - // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms - // that would normally be unprofitable because they strongly encourage jump - // threading. - Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); + Instruction *FoldOpIntoPhi(Instruction &I); // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" // operator and they all are only used by the PHI, PHI together their @@ -333,11 +360,7 @@ Instruction *SimplifyMemSet(MemSetInst *MI); - Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); - - unsigned GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign = 0); - + Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); };
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 262a855..d10046c 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -84,43 +84,37 @@ } Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), TD)) return ReplaceInstUsesWith(I, V); - - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { - // X + (signbit) --> X ^ signbit - const APInt& Val = CI->getValue(); - uint32_t BitWidth = Val.getBitWidth(); - if (Val == APInt::getSignBit(BitWidth)) - return BinaryOperator::CreateXor(LHS, RHS); - - // See if SimplifyDemandedBits can simplify this. This handles stuff like - // (X & 254)+1 -> (X&254)|1 - if (SimplifyDemandedInstructionBits(I)) - return &I; + // (A*B)+(A*C) -> A*(B+C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); - // zext(bool) + C -> bool ? C + 1 : C - if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) - return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - } - - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // X + (signbit) --> X ^ signbit + const APInt &Val = CI->getValue(); + if (Val.isSignBit()) + return BinaryOperator::CreateXor(LHS, RHS); - ConstantInt *XorRHS = 0; - Value *XorLHS = 0; - if (isa<ConstantInt>(RHSC) && - match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(bool) + C -> bool ? C + 1 : C + if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) + if (ZI->getSrcTy()->isIntegerTy(1)) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); + + Value *XorLHS = 0; ConstantInt *XorRHS = 0; + if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); - const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); + const APInt &RHSVal = CI->getValue(); unsigned ExtendAmt = 0; // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. @@ -130,49 +124,43 @@ else if (XorRHS->getValue().isPowerOf2()) ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; } - + if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); if (!MaskedValueIsZero(XorLHS, Mask)) ExtendAmt = 0; } - + if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); - Value *NewShl = Builder->CreateShl(XorLHS, ShAmt); + Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } } } + if (isa<Constant>(RHS) && isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(LHS, RHS); - if (I.getType()->isIntegerTy()) { - // X + X --> X << 1 - if (LHS == RHS) - return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); - - if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { - if (RHSI->getOpcode() == Instruction::Sub) - if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B - return ReplaceInstUsesWith(I, RHSI->getOperand(0)); - } - if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { - if (LHSI->getOpcode() == Instruction::Sub) - if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B - return ReplaceInstUsesWith(I, LHSI->getOperand(0)); - } + // X + X --> X << 1 + if (LHS == RHS) { + BinaryOperator *New = + BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); + New->setHasNoSignedWrap(I.hasNoSignedWrap()); + New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return New; } // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVectorTy()) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV); - return BinaryOperator::CreateNeg(NewAdd); - } + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); } return BinaryOperator::CreateSub(RHS, LHSV); @@ -199,13 +187,8 @@ if (dyn_castFoldableMul(RHS, C2) == LHS) return BinaryOperator::CreateMul(LHS, AddOne(C2)); - // X + ~X --> -1 since ~X = -X-1 - if (match(LHS, m_Not(m_Specific(RHS))) || - match(RHS, m_Not(m_Specific(LHS)))) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - // A+B --> A|B iff A and B have no bits set in common. - if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { + if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); @@ -222,7 +205,7 @@ } // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVectorTy()) { + { Value *W, *X, *Y, *Z; if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { @@ -238,7 +221,7 @@ } if (W == Y) { - Value *NewAdd = Builder->CreateAdd(X, Z); + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); return BinaryOperator::CreateMul(W, NewAdd); } } @@ -251,24 +234,22 @@ // (X & FF00) + xx00 -> (X+xx00) & FF00 if (LHS->hasOneUse() && - match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = ConstantExpr::getAnd(CRHS, C2); - if (Anded == CRHS) { - // See if all bits from the first bit set in the Add RHS up are included - // in the mask. First, get the rightmost bit. - const APInt &AddRHSV = CRHS->getValue(); + match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && + CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt &AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); - // Form a mask of all bits from the lowest bit added through the top. - APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); - // See if the and mask includes all of these bits. - APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); - - if (AddRHSHighBits == AddRHSHighBitsAnd) { - // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS); - return BinaryOperator::CreateAnd(NewAdd, C2); - } + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -293,12 +274,11 @@ // Can we fold the add into the argument of the select? // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && - match(TV, m_Sub(m_Value(N), m_Specific(A)))) + if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && - match(FV, m_Sub(m_Value(N), m_Specific(A)))) + + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); } @@ -316,7 +296,7 @@ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI); + CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -332,7 +312,7 @@ RHSConv->getOperand(0))) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0)); + RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -342,7 +322,7 @@ } Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Constant *RHSC = dyn_cast<Constant>(RHS)) { @@ -389,7 +369,7 @@ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI); + CI, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -405,7 +385,7 @@ RHSConv->getOperand(0))) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0)); + RHSConv->getOperand(0),"addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -421,9 +401,13 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { TargetData &TD = *getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); Value *Result = Constant::getNullValue(IntPtrTy); + // If the GEP is inbounds, we know that none of the addressing operations will + // overflow in an unsigned sense. + bool isInBounds = cast<GEPOperator>(GEP)->isInBounds(); + // Build a mask for high order bits. unsigned IntPtrWidth = TD.getPointerSizeInBits(); uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); @@ -436,33 +420,34 @@ if (OpC->isZero()) continue; // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - Result = Builder->CreateAdd(Result, - ConstantInt::get(IntPtrTy, Size)); + if (Size) + Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); continue; } Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale); + Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); // Emit an add instruction. - Result = Builder->CreateAdd(Result, Scale); + Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; } // Convert to correct type. if (Op->getType() != IntPtrTy) - Op = Builder->CreateIntCast(Op, IntPtrTy, true); + Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { - Constant *Scale = ConstantInt::get(IntPtrTy, Size); // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, Scale); + Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".idx", isInBounds /*NUW*/); } // Emit an add instruction. - Result = Builder->CreateAdd(Op, Result); + Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); } return Result; } @@ -475,7 +460,7 @@ /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. /// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, - const Type *Ty) { + Type *Ty) { assert(TD && "Must have target data info for this"); // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize @@ -535,7 +520,7 @@ // If we have p - gep(p, ...) then we have to negate the result. if (Swapped) - Result = Builder->CreateNeg(Result); + Result = Builder->CreateNeg(Result, "diff.neg"); return Builder->CreateIntCast(Result, Ty, true); } @@ -544,8 +529,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + // (A*B)-(A*C) -> A*(B-C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. if (Value *V = dyn_castNegVal(Op1)) { @@ -555,18 +545,14 @@ return Res; } - if (isa<UndefValue>(Op0)) - return ReplaceInstUsesWith(I, Op0); // undef - X -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X - undef -> undef if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(Op0, Op1); + + // Replace (-1 - A) with (~A). + if (match(Op0, m_AllOnes())) + return BinaryOperator::CreateNot(Op1); if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { - // Replace (-1 - A) with (~A). - if (C->isAllOnesValue()) - return BinaryOperator::CreateNot(Op1); - // C - ~X == X + (1+C) Value *X = 0; if (match(Op1, m_Not(m_Value(X)))) @@ -575,29 +561,16 @@ // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) if (C->isZero()) { - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { - if (SI->getOpcode() == Instruction::LShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert AShr. - return BinaryOperator::Create(Instruction::AShr, - SI->getOperand(0), CU); - } - } - } else if (SI->getOpcode() == Instruction::AShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert LShr. - return BinaryOperator::CreateLShr( - SI->getOperand(0), CU); - } - } - } - } + Value *X; ConstantInt *CI; + if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateAShr(X, CI); + + if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateLShr(X, CI); } // Try to fold constant sub into select arguments. @@ -607,83 +580,80 @@ // C - zext(bool) -> bool ? C - 1 : C if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); + + // C-(X+C2) --> (C-C2)-X + ConstantInt *C2; + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) + return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); } - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op1I->getOpcode() == Instruction::Add) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1)); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0)); - else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { - if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) - // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub( - ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); - } - } - - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::Sub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level add instruction... - return BinaryOperator::CreateAdd(Op0, Op1); - } - - // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... - // - if (Op1I->getOpcode() == Instruction::And && - (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { - Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); - - Value *NewNot = Builder->CreateNot(OtherOp); - return BinaryOperator::CreateAnd(Op0, NewNot); - } - - // 0 - (X sdiv C) -> (X sdiv -C) - if (Op1I->getOpcode() == Instruction::SDiv) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isZero()) - if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) - return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - ConstantExpr::getNeg(DivRHS)); - - // 0 - (C << X) -> (-C << X) - if (Op1I->getOpcode() == Instruction::Shl) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isZero()) - if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0))) - return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1)); - - // X - X*C --> X * (1-C) - ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = - ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), - C2); - return BinaryOperator::CreateMul(Op0, CP1); - } - } + + { Value *Y; + // X-(X+Y) == -Y X-(Y+X) == -Y + if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || + match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) + return BinaryOperator::CreateNeg(Y); + + // (X-Y)-X == -Y + if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) + return BinaryOperator::CreateNeg(Y); } + + if (Op1->hasOneUse()) { + Value *X = 0, *Y = 0, *Z = 0; + Constant *C = 0; + ConstantInt *CI = 0; - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (Op0I->getOpcode() == Instruction::Add) { - if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::Sub) { - if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1)); + // (X - (Y - Z)) --> (X + (Z - Y)). + if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) + return BinaryOperator::CreateAdd(Op0, + Builder->CreateSub(Z, Y, Op1->getName())); + + // (X - (X & Y)) --> (X & ~Y) + // + if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) || + match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) + return BinaryOperator::CreateAnd(Op0, + Builder->CreateNot(Y, Y->getName() + ".not")); + + // 0 - (X sdiv C) -> (X sdiv -C) + if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && + match(Op0, m_Zero())) + return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); + + // 0 - (X << Y) -> (-X << Y) when X is freely negatable. + if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) + if (Value *XNeg = dyn_castNegVal(X)) + return BinaryOperator::CreateShl(XNeg, Y); + + // X - X*C --> X * (1-C) + if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); + return BinaryOperator::CreateMul(Op0, CP1); + } + + // X - X<<C --> X * (1-(1<<C)) + if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *One = ConstantInt::get(I.getType(), 1); + C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); + return BinaryOperator::CreateMul(Op0, C); + } + + // X - A*-B -> X + A*B + // X - -A*B -> X + A*B + Value *A, *B; + if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || + match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) + return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); + + // X - A*CI -> X + A*-CI + // X - CI*A -> X + A*-CI + if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || + match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) { + Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); + return BinaryOperator::CreateAdd(Op0, NewMul); } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a00337f..5e0bfe8 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -172,7 +173,9 @@ case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; - case 7: return ConstantInt::getTrue(LHS->getContext()); + case 7: + if (!isordered) return ConstantInt::getTrue(LHS->getContext()); + Pred = FCmpInst::FCMP_ORD; break; } return Builder->CreateFCmp(Pred, LHS, RHS); } @@ -207,15 +210,26 @@ } break; case Instruction::Or: - if (Together == AndRHS) // (X | C) & C --> C - return ReplaceInstUsesWith(TheAnd, AndRHS); - - if (Op->hasOneUse() && Together != OpRHS) { - // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Value *Or = Builder->CreateOr(X, Together); - Or->takeName(Op); - return BinaryOperator::CreateAnd(Or, AndRHS); + if (Op->hasOneUse()){ + if (Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Value *Or = Builder->CreateOr(X, Together); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); + } + + ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together); + if (TogetherCI && !TogetherCI->isZero()){ + // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 + // NOTE: This reduces the number of bits set in the & mask, which + // can expose opportunities for store narrowing. + Together = ConstantExpr::getXor(AndRHS, Together); + Value *And = Builder->CreateAnd(X, Together); + And->takeName(Op); + return BinaryOperator::CreateOr(And, OpRHS); + } } + break; case Instruction::Add: if (Op->hasOneUse()) { @@ -261,10 +275,11 @@ ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), AndRHS->getValue() & ShlMask); - if (CI->getValue() == ShlMask) { - // Masking out bits that the shift already masks + if (CI->getValue() == ShlMask) + // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. - } else if (CI != AndRHS) { // Reducing bits set in and. + + if (CI != AndRHS) { // Reducing bits set in and. TheAnd.setOperand(1, CI); return &TheAnd; } @@ -281,10 +296,11 @@ ConstantInt *CI = ConstantInt::get(Op->getContext(), AndRHS->getValue() & ShrMask); - if (CI->getValue() == ShrMask) { - // Masking out bits that the shift already masks. + if (CI->getValue() == ShrMask) + // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); - } else if (CI != AndRHS) { + + if (CI != AndRHS) { TheAnd.setOperand(1, CI); // Reduce bits set in and cst. return &TheAnd; } @@ -304,8 +320,8 @@ // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS); - return BinaryOperator::CreateAnd(ShVal, AndRHS); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } } break; @@ -315,7 +331,7 @@ /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. @@ -338,7 +354,7 @@ // Emit V-Lo <u Hi-Lo Constant *NegLo = ConstantExpr::getNeg(Lo); - Value *Add = Builder->CreateAdd(V, NegLo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); return Builder->CreateICmpULT(Add, UpperBound); } @@ -357,7 +373,7 @@ // Emit V-Lo >u Hi-1-Lo // Note that Hi has already had one subtracted from it, above. ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); - Value *Add = Builder->CreateAdd(V, NegLo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); return Builder->CreateICmpUGT(Add, LowerBound); } @@ -430,8 +446,272 @@ } if (isSub) - return Builder->CreateSub(LHSI->getOperand(0), RHS); - return Builder->CreateAdd(LHSI->getOperand(0), RHS); + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); +} + +/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C) +/// One of A and B is considered the mask, the other the value. This is +/// described as the "AMask" or "BMask" part of the enum. If the enum +/// contains only "Mask", then both A and B can be considered masks. +/// If A is the mask, then it was proven, that (A & C) == C. This +/// is trivial if C == A, or C == 0. If both A and C are constants, this +/// proof is also easy. +/// For the following explanations we assume that A is the mask. +/// The part "AllOnes" declares, that the comparison is true only +/// if (A & B) == A, or all bits of A are set in B. +/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes +/// The part "AllZeroes" declares, that the comparison is true only +/// if (A & B) == 0, or all bits of A are cleared in B. +/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes +/// The part "Mixed" declares, that (A & B) == C and C might or might not +/// contain any number of one bits and zero bits. +/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed +/// The Part "Not" means, that in above descriptions "==" should be replaced +/// by "!=". +/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes +/// If the mask A contains a single bit, then the following is equivalent: +/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) +/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) +enum MaskedICmpType { + FoldMskICmp_AMask_AllOnes = 1, + FoldMskICmp_AMask_NotAllOnes = 2, + FoldMskICmp_BMask_AllOnes = 4, + FoldMskICmp_BMask_NotAllOnes = 8, + FoldMskICmp_Mask_AllZeroes = 16, + FoldMskICmp_Mask_NotAllZeroes = 32, + FoldMskICmp_AMask_Mixed = 64, + FoldMskICmp_AMask_NotMixed = 128, + FoldMskICmp_BMask_Mixed = 256, + FoldMskICmp_BMask_NotMixed = 512 +}; + +/// return the set of pattern classes (from MaskedICmpType) +/// that (icmp SCC (A & B), C) satisfies +static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, + ICmpInst::Predicate SCC) +{ + ConstantInt *ACst = dyn_cast<ConstantInt>(A); + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); + bool icmp_abit = (ACst != 0 && !ACst->isZero() && + ACst->getValue().isPowerOf2()); + bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + BCst->getValue().isPowerOf2()); + unsigned result = 0; + if (CCst != 0 && CCst->isZero()) { + // if C is zero, then both A and B qualify as mask + result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed | + FoldMskICmp_BMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed)); + return result; + } + if (A == C) { + result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed) + : (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed)); + } + else if (ACst != 0 && CCst != 0 && + ConstantExpr::getAnd(ACst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_AMask_Mixed + : FoldMskICmp_AMask_NotMixed); + } + if (B == C) + { + result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_BMask_Mixed)); + } + else if (BCst != 0 && CCst != 0 && + ConstantExpr::getAnd(BCst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_BMask_Mixed + : FoldMskICmp_BMask_NotMixed); + } + return result; +} + +/// foldLogOpOfMaskedICmpsHelper: +/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// return the set of pattern classes (from MaskedICmpType) +/// that both LHS and RHS satisfy +static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, + Value*& B, Value*& C, + Value*& D, Value*& E, + ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0; + if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0; + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; + // vectors are not (yet?) supported + if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; + + // Here comes the tricky part: + // LHS might be of the form L11 & L12 == X, X == L21 & L22, + // and L11 & L12 == L21 & L22. The same goes for RHS. + // Now we must find those components L** and R**, that are equal, so + // that we can extract the parameters A, B, C, D, and E for the canonical + // above. + Value *L1 = LHS->getOperand(0); + Value *L2 = LHS->getOperand(1); + Value *L11,*L12,*L21,*L22; + if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { + if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + L21 = L22 = 0; + } + else { + if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) + return 0; + std::swap(L1, L2); + L21 = L22 = 0; + } + + Value *R1 = RHS->getOperand(0); + Value *R2 = RHS->getOperand(1); + Value *R11,*R12; + bool ok = false; + if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { + A = R11; D = R12; E = R2; ok = true; + } + else + if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + A = R12; D = R11; E = R2; ok = true; + } + } + if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { + A = R11; D = R12; E = R1; ok = true; + } + else + if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + A = R12; D = R11; E = R1; ok = true; + } + else + return 0; + } + if (!ok) + return 0; + + if (L11 == A) { + B = L12; C = L2; + } + else if (L12 == A) { + B = L11; C = L2; + } + else if (L21 == A) { + B = L22; C = L1; + } + else if (L22 == A) { + B = L21; C = L1; + } + + unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC); + unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC); + return left_type & right_type; +} +/// foldLogOpOfMaskedICmps: +/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// into a single (icmp(A & X) ==/!= Y) +static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, + ICmpInst::Predicate NEWCC, + llvm::InstCombiner::BuilderTy* Builder) { + Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; + unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS); + if (mask == 0) return 0; + + if (NEWCC == ICmpInst::ICMP_NE) + mask >>= 1; // treat "Not"-states as normal states + + if (mask & FoldMskICmp_Mask_AllZeroes) { + // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) + // -> (icmp eq (A & (B|D)), 0) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + // we can't use C as zero, because we might actually handle + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // with B and D, having a single bit set + Value* zero = Constant::getNullValue(A->getType()); + return Builder->CreateICmp(NEWCC, newAnd, zero); + } + else if (mask & FoldMskICmp_BMask_AllOnes) { + // (icmp eq (A & B), B) & (icmp eq (A & D), D) + // -> (icmp eq (A & (B|D)), (B|D)) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + return Builder->CreateICmp(NEWCC, newAnd, newOr); + } + else if (mask & FoldMskICmp_AMask_AllOnes) { + // (icmp eq (A & B), A) & (icmp eq (A & D), A) + // -> (icmp eq (A & (B&D)), A) + Value* newAnd1 = Builder->CreateAnd(B, D); + Value* newAnd = Builder->CreateAnd(A, newAnd1); + return Builder->CreateICmp(NEWCC, newAnd, A); + } + else if (mask & FoldMskICmp_BMask_Mixed) { + // (icmp eq (A & B), C) & (icmp eq (A & D), E) + // We already know that B & C == C && D & E == E. + // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of + // C and E, which are shared by both the mask B and the mask D, don't + // contradict, then we can transform to + // -> (icmp eq (A & (B|D)), (C|E)) + // Currently, we only handle the case of B, C, D, and E being constant. + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + if (BCst == 0) return 0; + ConstantInt *DCst = dyn_cast<ConstantInt>(D); + if (DCst == 0) return 0; + // we can't simply use C and E, because we might actually handle + // (icmp ne (A & B), B) & (icmp eq (A & D), D) + // with B and D, having a single bit set + + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + if (CCst == 0) return 0; + if (LHS->getPredicate() != NEWCC) + CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); + ConstantInt *ECst = dyn_cast<ConstantInt>(E); + if (ECst == 0) return 0; + if (RHS->getPredicate() != NEWCC) + ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); + ConstantInt* MCst = dyn_cast<ConstantInt>( + ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), + ConstantExpr::getXor(CCst, ECst)) ); + // if there is a conflict we should actually return a false for the + // whole construct + if (!MCst->isZero()) + return 0; + Value *newOr1 = Builder->CreateOr(B, D); + Value *newOr2 = ConstantExpr::getOr(CCst, ECst); + Value *newAnd = Builder->CreateAnd(A, newOr1); + return Builder->CreateICmp(NEWCC, newAnd, newOr2); + } + return 0; } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -451,6 +731,10 @@ return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } + + // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder)) + return V; // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); @@ -472,20 +756,52 @@ Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) --> - // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT - if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *Op1 = 0, *Op2 = 0; - ConstantInt *CI1 = 0, *CI2 = 0; - if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && - match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { - if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && - CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { - Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); - return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr); - } + + // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } + + // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + } + + // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 + // where CMAX is the all ones value for the truncated type, + // iff the lower bits of C2 and CA are zero. + if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) && + LHS->hasOneUse() && RHS->hasOneUse()) { + Value *V; + ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + + // (trunc x) == C1 & (and x, CA) == C2 + if (match(Val2, m_Trunc(m_Value(V))) && + match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = RHSCst; + BigCst = LHSCst; + } + // (and x, CA) == C2 & (trunc x) == C1 + else if (match(Val, m_Trunc(m_Value(V))) && + match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = LHSCst; + BigCst = RHSCst; + } + + if (SmallCst && BigCst) { + unsigned BigBitSize = BigCst->getType()->getBitWidth(); + unsigned SmallBitSize = SmallCst->getType()->getBitWidth(); + + // Check that the low bits are zero. + APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); + if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) { + Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue()); + APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue(); + Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N); + return Builder->CreateICmp(LHSCC, NewAnd, NewVal); } } } @@ -500,7 +816,17 @@ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) return 0; - + + // Make a constant range that's the intersection of the two icmp ranges. + // If the intersection is empty, we know that the result is false. + ConstantRange LHSRange = + ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange RHSRange = + ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + + if (LHSRange.intersectWith(RHSRange).isEmptySet()) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) return 0; @@ -533,10 +859,6 @@ case ICmpInst::ICMP_EQ: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 @@ -560,7 +882,7 @@ case ICmpInst::ICMP_NE: if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1)); } break; // (X != 13 & X != 15) -> no change @@ -584,9 +906,6 @@ case ICmpInst::ICMP_SLT: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 @@ -712,12 +1031,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyAndInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -725,7 +1048,6 @@ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); - APInt NotAndRHS(~AndRHSMask); // Optimize a variety of ((val OP C1) & C2) combinations... if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { @@ -734,23 +1056,27 @@ switch (Op0I->getOpcode()) { default: break; case Instruction::Xor: - case Instruction::Or: + case Instruction::Or: { // If the mask is only needed on one incoming arm, push it up. if (!Op0I->hasOneUse()) break; + APInt NotAndRHS(~AndRHSMask); if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS); + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); } if (!isa<Constant>(Op0RHS) && MaskedValueIsZero(Op0RHS, NotAndRHS)) { // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS); + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); } break; + } case Instruction::Add: // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 @@ -770,14 +1096,12 @@ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS // has 1's for all bits that the subtraction with A might affect. - if (Op0I->hasOneUse()) { + if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) { uint32_t BitWidth = AndRHSMask.getBitWidth(); uint32_t Zeros = AndRHSMask.countLeadingZeros(); APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); - ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); - if (!(A && A->isZero()) && // avoid infinite recursion. - MaskedValueIsZero(Op0LHS, Mask)) { + if (MaskedValueIsZero(Op0LHS, Mask)) { Value *NewNeg = Builder->CreateNeg(Op0RHS); return BinaryOperator::CreateAnd(NewNeg, AndRHS); } @@ -795,38 +1119,25 @@ } break; } - + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) return Res; - } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { - // If this is an integer truncation or change from signed-to-unsigned, and - // if the source is an and/or with immediate, transform it. This - // frequently occurs for bitfield accesses. - if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { - if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && - CastOp->getNumOperands() == 2) - if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ - if (CastOp->getOpcode() == Instruction::And) { - // Change: and (cast (and X, C1) to T), C2 - // into : and (cast X to T), trunc_or_bitcast(C1)&C2 - // This will fold the two constants together, which may allow - // other simplifications. - Value *NewCast = Builder->CreateTruncOrBitCast( - CastOp->getOperand(0), I.getType()); - // trunc_or_bitcast(C1)&C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - C3 = ConstantExpr::getAnd(C3, AndRHS); - return BinaryOperator::CreateAnd(NewCast, C3); - } else if (CastOp->getOpcode() == Instruction::Or) { - // Change: and (cast (or X, C1) to T), C2 - // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) - // trunc(C1)&C2 - return ReplaceInstUsesWith(I, AndRHS); - } - } + } + + // If this is an integer truncation, and if the source is an 'and' with + // immediate, transform it. This frequently occurs for bitfield accesses. + { + Value *X = 0; ConstantInt *YC = 0; + if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { + // Change: and (trunc (and X, YC) to T), C2 + // into : and (trunc X to T), trunc(YC) & C2 + // This will fold the two constants together, which may allow + // other simplifications. + Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); + Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); } } @@ -844,10 +1155,11 @@ if (Value *Op0NotVal = dyn_castNotVal(Op0)) if (Value *Op1NotVal = dyn_castNotVal(Op1)) if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal); + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); return BinaryOperator::CreateNot(Or); } - + { Value *A = 0, *B = 0, *C = 0, *D = 0; // (A|B) & ~(A&B) -> A^B @@ -862,26 +1174,31 @@ ((A == C && B == D) || (A == D && B == C))) return BinaryOperator::CreateXor(A, B); - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1) { // (A^B)&A -> A&(A^B) - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } else if (B == Op1) { // (A^B)&B -> B&(B^A) - cast<BinaryOperator>(Op0)->swapOperands(); - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); + // A&(A^B) => A & ~B + { + Value *tmpOp0 = Op0; + Value *tmpOp1 = Op1; + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1 ) { + tmpOp1 = Op0; + tmpOp0 = Op1; + // Simplify below + } } - } - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_Value(B)))) { - if (B == Op0) { // B&(A^B) -> B&(B^A) - cast<BinaryOperator>(Op1)->swapOperands(); - std::swap(A, B); + if (tmpOp1->hasOneUse() && + match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == tmpOp0) { + std::swap(A, B); + } + // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if + // A is originally -1 (or a vector of -1 and undefs), then we enter + // an endless loop. By checking that A is non-constant we ensure that + // we will never get to the loop. + if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); } - if (A == Op0) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); } // (A&((~A)|B)) -> A&B @@ -908,7 +1225,7 @@ // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { @@ -917,7 +1234,7 @@ // Only do this if the casts both really cause code to be generated. if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { - Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp); + Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } @@ -944,7 +1261,8 @@ SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { Value *NewOp = - Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0)); + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -1083,7 +1401,7 @@ /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. /// If so, insert the new bswap intrinsic and return it. Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); + IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) @@ -1107,9 +1425,8 @@ for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) return 0; - const Type *Tys[] = { ITy }; Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); } @@ -1155,7 +1472,12 @@ return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } - + + // handle (roughly): + // (icmp ne (A & B), C) | (icmp ne (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder)) + return V; + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); @@ -1168,24 +1490,29 @@ Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) --> - // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT - if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { - Value *Op1 = 0, *Op2 = 0; - ConstantInt *CI1 = 0, *CI2 = 0; - if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && - match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { - if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && - CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { - Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); - return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr); - } - } + + // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); } } - + + // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) + // iff C2 + CA == C1. + if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) { + ConstantInt *AddCst; + if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst)))) + if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue()) + return Builder->CreateICmpULE(Val, LHSCst); + } + // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; @@ -1233,7 +1560,7 @@ if (LHSCst == SubOne(RHSCst)) { // (X == 13 | X == 14) -> X-13 <u 2 Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); return Builder->CreateICmpULT(Add, AddCST); } @@ -1424,12 +1751,16 @@ } Instruction *InstCombiner::visitOr(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyOrInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -1476,8 +1807,8 @@ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. if (match(Op0, m_Or(m_Value(), m_Value())) || match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_Shift(m_Value(), m_Value())) && - match(Op1, m_Shift(m_Value(), m_Value())))) { + (match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value())))) { if (Instruction *BSwap = MatchBSwap(I)) return BSwap; } @@ -1504,7 +1835,7 @@ Value *C = 0, *D = 0; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0, *V3 = 0; + Value *V1 = 0, *V2 = 0; C1 = dyn_cast<ConstantInt>(C); C2 = dyn_cast<ConstantInt>(D); if (C1 && C2) { // (A & C1)|(B & C2) @@ -1555,32 +1886,13 @@ (C3->getValue() & ~C1->getValue()) == 0 && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && (C4->getValue() & ~C2->getValue()) == 0) { - V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4)); + V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); return BinaryOperator::CreateAnd(V2, ConstantInt::get(B->getContext(), C1->getValue()|C2->getValue())); } } } - - // Check to see if we have any common things being and'ed. If so, find the - // terms for V1 & (V2|V3). - if (Op0->hasOneUse() || Op1->hasOneUse()) { - V1 = 0; - if (A == B) // (A & C)|(A & D) == A & (C|D) - V1 = A, V2 = C, V3 = D; - else if (A == D) // (A & C)|(B & A) == A & (B|C) - V1 = A, V2 = B, V3 = C; - else if (C == B) // (A & C)|(C & D) == C & (A|D) - V1 = C, V2 = A, V3 = D; - else if (C == D) // (A & C)|(B & C) == C & (A|B) - V1 = C, V2 = A, V3 = B; - - if (V1) { - Value *Or = Builder->CreateOr(V2, V3); - return BinaryOperator::CreateAnd(V1, Or); - } - } // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants. // Don't do this for vector select idioms, the code generator doesn't handle @@ -1633,7 +1945,8 @@ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0)); + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -1643,10 +1956,44 @@ if (Value *Op0NotVal = dyn_castNotVal(Op0)) if (Value *Op1NotVal = dyn_castNotVal(Op1)) if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal); + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); return BinaryOperator::CreateNot(And); } + // Canonicalize xor to the RHS. + if (match(Op0, m_Xor(m_Value(), m_Value()))) + std::swap(Op0, Op1); + + // A | ( A ^ B) -> A | B + // A | (~A ^ B) -> A | ~B + if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (Op0 == A || Op0 == B) + return BinaryOperator::CreateOr(A, B); + + if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(B, B->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(A, A->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + + // A | ~(A | B) -> A | ~B + // A | ~(A ^ B) -> A | ~B + if (match(Op1, m_Not(m_Value(A)))) + if (BinaryOperator *B = dyn_cast<BinaryOperator>(A)) + if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) && + Op1->hasOneUse() && (B->getOpcode() == Instruction::Or || + B->getOpcode() == Instruction::Xor)) { + Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : + B->getOperand(0); + Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) if (Value *Res = FoldOrOfICmps(LHS, RHS)) @@ -1660,65 +2007,76 @@ // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVectorTy()) { - Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + CastInst *Op1C = dyn_cast<CastInst>(Op1); + if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); - if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && - // Only do this if the casts both really cause code to be - // generated. - ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && - ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { - Value *NewOp = Builder->CreateOr(Op0COp, Op1COp); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - - // If this is or(cast(icmp), cast(icmp)), try to fold this even if the - // cast is otherwise not optimizable. This happens for vector sexts. - if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) - if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) - if (Value *Res = FoldOrOfICmps(LHS, RHS)) - return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - - // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the - // cast is otherwise not optimizable. This happens for vector sexts. - if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) - if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) - if (Value *Res = FoldOrOfFCmps(LHS, RHS)) - return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && + // Only do this if the casts both really cause code to be + // generated. + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); } + } + } + + // or(sext(A), B) -> A ? -1 : B where A is an i1 + // or(A, sext(B)) -> B ? -1 : A where B is an i1 + if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); + if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); + + // Note: If we've gotten to the point of visiting the outer OR, then the + // inner one couldn't be simplified. If it was a constant, then it won't + // be simplified by a later pass either, so we try swapping the inner/outer + // ORs in the hopes that we'll be able to simplify it this way. + // (X|C) | V --> (X|V) | C + if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) && + match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { + Value *Inner = Builder->CreateOr(A, Op1); + Inner->takeName(Op0); + return BinaryOperator::CreateOr(Inner, C1); } return Changed ? &I : 0; } Instruction *InstCombiner::visitXor(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) { - if (isa<UndefValue>(Op0)) - // Handle undef ^ undef -> 0 special case. This is a common - // idiom (misuse). - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef - } + if (Value *V = SimplifyXorInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // xor X, X = 0 - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - + // (A&B)^(A&C) -> A&(B^C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (I.getType()->isVectorTy()) - if (isa<ConstantAggregateZero>(Op1)) - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { @@ -1731,7 +2089,8 @@ Op0I->swapOperands(); if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { Value *NotY = - Builder->CreateNot(Op0I->getOperand(1)); + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); if (Op0I->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(Op0NotVal, NotY); return BinaryOperator::CreateAnd(Op0NotVal, NotY); @@ -1742,9 +2101,9 @@ if (isFreeToInvert(Op0I->getOperand(0)) && isFreeToInvert(Op0I->getOperand(1))) { Value *NotX = - Builder->CreateNot(Op0I->getOperand(0)); + Builder->CreateNot(Op0I->getOperand(0), "notlhs"); Value *NotY = - Builder->CreateNot(Op0I->getOperand(1)); + Builder->CreateNot(Op0I->getOperand(1), "notrhs"); if (Op0I->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(NotX, NotY); return BinaryOperator::CreateAnd(NotX, NotY); @@ -1836,15 +2195,6 @@ return NV; } - if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 - if (X == Op1) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 - if (X == Op0) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); if (Op1I) { Value *A, *B; @@ -1857,10 +2207,6 @@ I.swapOperands(); // Simplified below. std::swap(Op0, Op1); } - } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // A^(A^B) == B - } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { - return ReplaceInstUsesWith(I, A); // A^(B^A) == B } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ if (A == Op0) { // A^(A&B) -> A^(B&A) @@ -1883,10 +2229,6 @@ std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); - } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // (A^B)^A == B - } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { - return ReplaceInstUsesWith(I, A); // (B^A)^A == B } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A @@ -1904,7 +2246,8 @@ Op0I->getOperand(1) == Op1I->getOperand(1) && (Op1I->hasOneUse() || Op1I->hasOneUse())) { Value *NewOp = - Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0)); + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); return BinaryOperator::Create(Op1I->getOpcode(), NewOp, Op1I->getOperand(1)); } @@ -1923,29 +2266,8 @@ if ((A == C && B == D) || (A == D && B == C)) return BinaryOperator::CreateXor(A, B); } - - // (A & B)^(C & D) - if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && - match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - // (X & Y)^(X & Y) -> (Y^Z) & X - Value *X = 0, *Y = 0, *Z = 0; - if (A == C) - X = A, Y = B, Z = D; - else if (A == D) - X = A, Y = B, Z = C; - else if (B == C) - X = B, Y = A, Z = D; - else if (B == D) - X = B, Y = A, Z = C; - - if (X) { - Value *NewOp = Builder->CreateXor(Y, Z); - return BinaryOperator::CreateAnd(NewOp, X); - } - } } - + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) @@ -1967,7 +2289,7 @@ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && // Only do this if the casts both really cause code to be generated. ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), @@ -1975,7 +2297,7 @@ ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType())) { Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), - Op1C->getOperand(0)); + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7428478..eac794b 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,116 +12,27 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; /// getPromotedType - Return the specified type promoted as it would be to pass /// though a va_arg area. -static const Type *getPromotedType(const Type *Ty) { - if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { +static Type *getPromotedType(Type *Ty) { + if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { if (ITy->getBitWidth() < 32) return Type::getInt32Ty(Ty->getContext()); } return Ty; } -/// EnforceKnownAlignment - If the specified pointer points to an object that -/// we control, modify the object's alignment to PrefAlign. This isn't -/// often possible though. If alignment is important, a more reliable approach -/// is to simply align all global variables and allocation instructions to -/// their preferred alignment from the beginning. -/// -static unsigned EnforceKnownAlignment(Value *V, - unsigned Align, unsigned PrefAlign) { - - User *U = dyn_cast<User>(V); - if (!U) return Align; - - switch (Operator::getOpcode(U)) { - default: break; - case Instruction::BitCast: - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - case Instruction::GetElementPtr: { - // If all indexes are zero, it is just the alignment of the base pointer. - bool AllZeroOperands = true; - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) - if (!isa<Constant>(*i) || - !cast<Constant>(*i)->isNullValue()) { - AllZeroOperands = false; - break; - } - - if (AllZeroOperands) { - // Treat this like a bitcast. - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - } - return Align; - } - case Instruction::Alloca: { - AllocaInst *AI = cast<AllocaInst>(V); - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - return AI->getAlignment(); - AI->setAlignment(PrefAlign); - return PrefAlign; - } - } - - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // If there is a large requested alignment and we can, bump up the alignment - // of the global. - if (GV->isDeclaration()) return Align; - - if (GV->getAlignment() >= PrefAlign) - return GV->getAlignment(); - // We can only increase the alignment of the global if it has no alignment - // specified or if it is not assigned a section. If it is assigned a - // section, the global could be densely packed with other objects in the - // section, increasing the alignment could cause padding issues. - if (!GV->hasSection() || GV->getAlignment() == 0) - GV->setAlignment(PrefAlign); - return GV->getAlignment(); - } - - return Align; -} - -/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that -/// we can determine, return it, otherwise return 0. If PrefAlign is specified, -/// and it is more than the alignment of the ultimate object, see if we can -/// increase the alignment of the ultimate object, making this check succeed. -unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign) { - assert(V->getType()->isPointerTy() && - "GetOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne); - unsigned TrailZ = KnownZero.countTrailingOnes(); - - // Avoid trouble with rediculously large TrailZ values, such as - // those computed from a null pointer. - TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); - - // LLVM doesn't support alignments larger than this currently. - Align = std::min(Align, +Value::MaximumAlignment); - - if (PrefAlign > Align) - Align = EnforceKnownAlignment(V, Align, PrefAlign); - - // We don't need to make any adjustment. - return Align; -} Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1)); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -152,7 +63,7 @@ unsigned DstAddrSp = cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); - const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); + IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); @@ -164,18 +75,18 @@ // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); if (StrippedDest != MI->getArgOperand(0)) { - const Type *SrcETy = cast<PointerType>(StrippedDest->getType()) + Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. while (!SrcETy->isSingleValueType()) { - if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { + if (StructType *STy = dyn_cast<StructType>(SrcETy)) { if (STy->getNumElements() == 1) SrcETy = STy->getElementType(0); else break; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { if (ATy->getNumElements() == 1) SrcETy = ATy->getElementType(); else @@ -199,10 +110,10 @@ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); - Instruction *L = new LoadInst(Src, MI->isVolatile(), SrcAlign); - InsertNewInstBefore(L, *MI); - InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), - *MI); + LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); + L->setAlignment(SrcAlign); + StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); + S->setAlignment(DstAlign); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); @@ -210,7 +121,7 @@ } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); + unsigned Alignment = getKnownAlignment(MI->getDest(), TD); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -230,18 +141,21 @@ // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. + Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. Value *Dest = MI->getDest(); - Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); + unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); + Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); + Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), - Dest, false, Alignment), *MI); + StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, + MI->isVolatile()); + S->setAlignment(Alignment); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); @@ -256,6 +170,19 @@ /// the heavy lifting. /// Instruction *InstCombiner::visitCallInst(CallInst &CI) { + if (isFreeCall(&CI)) + return visitFree(CI); + if (isMalloc(&CI)) + return visitMalloc(CI); + + // If the caller function is nounwind, mark the call as nounwind, even if the + // callee isn't. + if (CI.getParent()->getParent()->doesNotThrow() && + !CI.doesNotThrow()) { + CI.setDoesNotThrow(); + return &CI; + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); if (!II) return visitCallSite(&CI); @@ -266,7 +193,8 @@ // memmove/cpy/set of zero bytes is a noop. if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { - if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); + if (NumBytes->isNullValue()) + return EraseInstFromFunction(CI); if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) if (CI->getZExtValue() == 1) { @@ -275,6 +203,10 @@ // alignment is sufficient. } } + + // No other transformations apply to volatile transfers. + if (MI->isVolatile()) + return 0; // If we have a memmove and the source operation is a constant global, // then the source and dest pointers can't alias, so we can change this @@ -284,10 +216,10 @@ if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getArgOperand(0)->getType(), - CI.getArgOperand(1)->getType(), - CI.getArgOperand(2)->getType() }; - CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); Changed = true; } } @@ -317,73 +249,73 @@ // We need target data for just about everything so depend on it. if (!TD) break; - const Type *ReturnTy = CI.getType(); - bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + Type *ReturnTy = CI.getType(); + uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; // Get to the real allocated thing and offset as fast as possible. Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); - + + uint64_t Offset = 0; + uint64_t Size = -1ULL; + + // Try to look through constant GEPs. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) { + if (!GEP->hasAllConstantIndices()) break; + + // Get the current byte offset into the thing. Use the original + // operand in case we're looking through a bitcast. + SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); + + Op1 = GEP->getPointerOperand()->stripPointerCasts(); + + // Make sure we're not a constant offset from an external + // global. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) + if (!GV->hasDefinitiveInitializer()) break; + } + // If we've stripped down to a single global variable that we // can know the size of then just return that. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { if (GV->hasDefinitiveInitializer()) { Constant *C = GV->getInitializer(); - uint64_t GlobalSize = TD->getTypeAllocSize(C->getType()); - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize)); + Size = TD->getTypeAllocSize(C->getType()); } else { // Can't determine size of the GV. - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow); return ReplaceInstUsesWith(CI, RetVal); } } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { // Get alloca size. if (AI->getAllocatedType()->isSized()) { - uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + Size = TD->getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize()); if (!C) break; - AllocaSize *= C->getZExtValue(); + Size *= C->getZExtValue(); } - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize)); } - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) { - // Only handle constant GEPs here. - if (CE->getOpcode() != Instruction::GetElementPtr) break; - GEPOperator *GEP = cast<GEPOperator>(CE); - - // Make sure we're not a constant offset from an external - // global. - Value *Operand = GEP->getPointerOperand(); - Operand = Operand->stripPointerCasts(); - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) - if (!GV->hasDefinitiveInitializer()) break; - - // Get what we're pointing to and its size. - const PointerType *BaseType = - cast<PointerType>(Operand->getType()); - uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType()); - - // Get the current byte offset into the thing. Use the original - // operand in case we're looking through a bitcast. - SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end()); - const PointerType *OffsetType = - cast<PointerType>(GEP->getPointerOperand()->getType()); - uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size()); - - if (Size < Offset) { - // Out of bound reference? Negative index normalized to large - // index? Just return "I don't know". - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - return ReplaceInstUsesWith(CI, RetVal); - } - - Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); - return ReplaceInstUsesWith(CI, RetVal); - } + } else if (CallInst *MI = extractMallocCall(Op1)) { + // Get allocation size. + Type* MallocType = getMallocAllocatedType(MI); + if (MallocType && MallocType->isSized()) + if (Value *NElems = getMallocArraySize(MI, TD, true)) + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType); + } // Do not return "I don't know" here. Later optimization passes could // make it possible to evaluate objectsize to a constant. - break; + if (Size == -1ULL) + break; + + if (Size < Offset) { + // Out of bound reference? Negative index normalized to large + // index? Just return "I don't know". + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow)); + } + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset)); } case Intrinsic::bswap: // bswap(bswap(x)) -> x @@ -421,7 +353,9 @@ case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -438,7 +372,9 @@ case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -454,7 +390,7 @@ break; case Intrinsic::uadd_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); @@ -472,25 +408,28 @@ if (LHSKnownNegative && RHSKnownNegative) { // The sign bit is set in both cases: this MUST overflow. // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, &CI); - Worklist.Add(Add); + Value *Add = Builder->CreateAdd(LHS, RHS); + Add->takeName(&CI); Constant *V[] = { - UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext()) + UndefValue::get(LHS->getType()), + ConstantInt::getTrue(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } - + if (LHSKnownPositive && RHSKnownPositive) { // The sign bit is clear in both cases: this CANNOT overflow. // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, &CI); - Worklist.Add(Add); + Value *Add = Builder->CreateNUWAdd(LHS, RHS); + Add->takeName(&CI); Constant *V[] = { UndefValue::get(LHS->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } } @@ -517,7 +456,8 @@ UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -537,12 +477,42 @@ UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; - case Intrinsic::umul_with_overflow: + case Intrinsic::umul_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // Get the largest possible values for each operand. + APInt LHSMax = ~LHSKnownZero; + APInt RHSMax = ~RHSKnownZero; + + // If multiplying the maximum values does not overflow then we can turn + // this into a plain NUW mul. + bool Overflow; + LHSMax.umul_ov(RHSMax, Overflow); + if (!Overflow) { + Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); + Constant *V[] = { + UndefValue::get(LHS->getType()), + Builder->getFalse() + }; + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); + return InsertValueInst::Create(Struct, Mul, 0); + } + } // FALL THROUGH case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. if (isa<Constant>(II->getArgOperand(0)) && @@ -568,48 +538,52 @@ UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; //case Intrinsic::ppc_altivec_lvx: //case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::x86_sse_loadu_ps: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse2_loadu_dq: - // Turn PPC lvx -> load if the pointer is known aligned. - // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), - PointerType::getUnqual(II->getType())); - return new LoadInst(Ptr); - } - break; + // // Turn PPC lvx -> load if the pointer is known aligned. + // if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { + // Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + // PointerType::getUnqual(II->getType())); + // return new LoadInst(Ptr); + // } + // break; //case Intrinsic::ppc_altivec_stvx: //case Intrinsic::ppc_altivec_stvxl: - // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) { - const Type *OpPtrTy = - PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); - return new StoreInst(II->getArgOperand(0), Ptr); - } - break; + // // Turn stvx -> store if the pointer is known aligned. + // if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { + // Type *OpPtrTy = + // PointerType::getUnqual(II->getArgOperand(0)->getType()); + // Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + // return new StoreInst(II->getArgOperand(0), Ptr); + // } + // break; case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { - const Type *OpPtrTy = + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); } break; - - case Intrinsic::x86_sse_cvttss2si: { - // These intrinsics only demands the 0th element of its input vector. If + + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: { + // These intrinsics only demand the 0th element of their input vectors. If // we can simplify the input based on that, do so now. unsigned VWidth = cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); @@ -622,56 +596,102 @@ } break; } - - //case Intrinsic::ppc_altivec_vperm: - // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { - assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); - - // Check that all of the elements are integer constants or undefs. - bool AllEltsOk = true; - for (unsigned i = 0; i != 16; ++i) { - if (!isa<ConstantInt>(Mask->getOperand(i)) && - !isa<UndefValue>(Mask->getOperand(i))) { - AllEltsOk = false; - break; - } - } - - if (AllEltsOk) { - // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), - Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), - Mask->getType()); - Value *Result = UndefValue::get(Op0->getType()); - - // Only extract each element once. - Value *ExtractedElts[32]; - memset(ExtractedElts, 0, sizeof(ExtractedElts)); - - for (unsigned i = 0; i != 16; ++i) { - if (isa<UndefValue>(Mask->getOperand(i))) - continue; - unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); - Idx &= 31; // Match the hardware behavior. - - if (ExtractedElts[Idx] == 0) { - ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, - ConstantInt::get(Type::getInt32Ty(II->getContext()), - Idx&15, false)); - } - - // Insert this value into the result vector. - Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - ConstantInt::get(Type::getInt32Ty(II->getContext()), - i, false)); - } - return CastInst::Create(Instruction::BitCast, Result, CI.getType()); - } + + + case Intrinsic::x86_sse41_pmovsxbw: + case Intrinsic::x86_sse41_pmovsxwd: + case Intrinsic::x86_sse41_pmovsxdq: + case Intrinsic::x86_sse41_pmovzxbw: + case Intrinsic::x86_sse41_pmovzxwd: + case Intrinsic::x86_sse41_pmovzxdq: { + // pmov{s|z}x ignores the upper half of their input vectors. + unsigned VWidth = + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); + unsigned LowHalfElts = VWidth / 2; + APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); + APInt UndefElts(VWidth, 0); + if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), + InputDemandedElts, + UndefElts)) { + II->setArgOperand(0, TmpV); + return II; } break; + } + + //case Intrinsic::ppc_altivec_vperm: + // // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + // if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { + // assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); + // + // // Check that all of the elements are integer constants or undefs. + // bool AllEltsOk = true; + // for (unsigned i = 0; i != 16; ++i) { + // if (!isa<ConstantInt>(Mask->getOperand(i)) && + // !isa<UndefValue>(Mask->getOperand(i))) { + // AllEltsOk = false; + // break; + // } + // } + // + // if (AllEltsOk) { + // // Cast the input vectors to byte vectors. + // Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), + // Mask->getType()); + // Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), + // Mask->getType()); + // Value *Result = UndefValue::get(Op0->getType()); + // + // // Only extract each element once. + // Value *ExtractedElts[32]; + // memset(ExtractedElts, 0, sizeof(ExtractedElts)); + // + // for (unsigned i = 0; i != 16; ++i) { + // if (isa<UndefValue>(Mask->getOperand(i))) + // continue; + // unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); + // Idx &= 31; // Match the hardware behavior. + // + // if (ExtractedElts[Idx] == 0) { + // ExtractedElts[Idx] = + // Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + // Builder->getInt32(Idx&15)); + // } + // + // // Insert this value into the result vector. + // Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], + // Builder->getInt32(i)); + // } + // return CastInst::Create(Instruction::BitCast, Result, CI.getType()); + // } + // } + // break; + + //case Intrinsic::arm_neon_vld1: + //case Intrinsic::arm_neon_vld2: + //case Intrinsic::arm_neon_vld3: + //case Intrinsic::arm_neon_vld4: + //case Intrinsic::arm_neon_vld2lane: + //case Intrinsic::arm_neon_vld3lane: + //case Intrinsic::arm_neon_vld4lane: + //case Intrinsic::arm_neon_vst1: + //case Intrinsic::arm_neon_vst2: + //case Intrinsic::arm_neon_vst3: + //case Intrinsic::arm_neon_vst4: + //case Intrinsic::arm_neon_vst2lane: + //case Intrinsic::arm_neon_vst3lane: + //case Intrinsic::arm_neon_vst4lane: { + // unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD); + // unsigned AlignArg = II->getNumArgOperands() - 1; + // ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); + // if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { + // II->setArgOperand(AlignArg, + // ConstantInt::get(Type::getInt32Ty(II->getContext()), + // MemAlign, false)); + // return II; + // } + // break; + //} case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can @@ -690,7 +710,7 @@ TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa<AllocaInst>(BI)) { + if (isa<AllocaInst>(BI) || isMalloc(BI)) { CannotRemove = true; break; } @@ -709,9 +729,11 @@ } } - // If the stack restore is in a return/unwind block and if there are no - // allocas or calls between the restore and the return, nuke the restore. - if (!CannotRemove && isa<ReturnInst>(TI)) + // If the stack restore is in a return, resume, or unwind block and if there + // are no allocas or calls between the restore and the return, nuke the + // restore. + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) || + isa<UnwindInst>(TI))) return EraseInstFromFunction(CI); break; } @@ -720,6 +742,12 @@ return visitCallSite(II); } +// InvokeInst simplification +// +Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { + return visitCallSite(&II); +} + /// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, @@ -735,9 +763,9 @@ if (!CS.paramHasAttr(ix, Attribute::ByVal)) return true; - const Type* SrcTy = + Type* SrcTy = cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); - const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); + Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) @@ -745,16 +773,138 @@ return true; } +namespace { +class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls { + InstCombiner *IC; +protected: + void replaceCall(Value *With) { + NewInstruction = IC->ReplaceInstUsesWith(*CI, With); + } + bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) + return true; + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { + if (SizeCI->isAllOnesValue()) + return true; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) + return SizeCI->getZExtValue() >= Arg->getZExtValue(); + } + return false; + } +public: + InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { } + Instruction *NewInstruction; +}; +} // end anonymous namespace + +// Try to fold some different type of calls here. +// Currently we're only working with the checking functions, memcpy_chk, +// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, +// strcat_chk and strncat_chk. +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { + if (CI->getCalledFunction() == 0) return 0; + + InstCombineFortifiedLibCalls Simplifier(this); + Simplifier.fold(CI, TD); + return Simplifier.NewInstruction; +} + +static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { + // Strip off at most one level of pointer casts, looking for an alloca. This + // is good enough in practice and simpler than handling any number of casts. + Value *Underlying = TrampMem->stripPointerCasts(); + if (Underlying != TrampMem && + (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem)) + return 0; + if (!isa<AllocaInst>(Underlying)) + return 0; + + IntrinsicInst *InitTrampoline = 0; + for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end(); + I != E; I++) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I); + if (!II) + return 0; + if (II->getIntrinsicID() == Intrinsic::init_trampoline) { + if (InitTrampoline) + // More than one init_trampoline writes to this value. Give up. + return 0; + InitTrampoline = II; + continue; + } + if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) + // Allow any number of calls to adjust.trampoline. + continue; + return 0; + } + + // No call to init.trampoline found. + if (!InitTrampoline) + return 0; + + // Check that the alloca is being used in the expected way. + if (InitTrampoline->getOperand(0) != TrampMem) + return 0; + + return InitTrampoline; +} + +static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, + Value *TrampMem) { + // Visit all the previous instructions in the basic block, and try to find a + // init.trampoline which has a direct path to the adjust.trampoline. + for (BasicBlock::iterator I = AdjustTramp, + E = AdjustTramp->getParent()->begin(); I != E; ) { + Instruction *Inst = --I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::init_trampoline && + II->getOperand(0) == TrampMem) + return II; + if (Inst->mayWriteToMemory()) + return 0; + } + return 0; +} + +// Given a call to llvm.adjust.trampoline, find and return the corresponding +// call to llvm.init.trampoline if the call to the trampoline can be optimized +// to a direct call to a function. Otherwise return NULL. +// +static IntrinsicInst *FindInitTrampoline(Value *Callee) { + Callee = Callee->stripPointerCasts(); + IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); + if (!AdjustTramp || + AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) + return 0; + + Value *TrampMem = AdjustTramp->getOperand(0); + + if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) + return IT; + if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) + return IT; + return 0; +} + // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { bool Changed = false; - // If the callee is a constexpr cast of a function, attempt to move the cast - // to the arguments of the call/invoke. - if (transformConstExprCastCall(CS)) return 0; - + // If the callee is a pointer to a function, attempt to move any casts to the + // arguments of the call/invoke. Value *Callee = CS.getCalledValue(); + if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) + return 0; if (Function *CalleeF = dyn_cast<Function>(Callee)) // If the call and callee calling conventions don't match, this call must @@ -771,10 +921,14 @@ // If OldCall dues not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!OldCall->getType()->isVoidTy()) - OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); + ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); if (isa<CallInst>(OldCall)) return EraseInstFromFunction(*OldCall); - + + // We cannot remove an invoke, because it would change the CFG, just + // change the callee to a null pointer. + cast<InvokeInst>(OldCall)->setCalledFunction( + Constant::getNullValue(CalleeF->getType())); return 0; } @@ -789,21 +943,24 @@ // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) - CS.getInstruction()-> - replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); + ReplaceInstUsesWith(*CS.getInstruction(), + UndefValue::get(CS.getInstruction()->getType())); + if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + // Don't break the CFG, insert a dummy cond branch. + BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), + ConstantInt::getTrue(Callee->getContext()), II); + } return EraseInstFromFunction(*CS.getInstruction()); } - if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) - if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); + if (IntrinsicInst *II = FindInitTrampoline(Callee)) + return transformCallThroughTrampoline(CS, II); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (ISA_INVOKE_INST(Callee) ? 3 : 1); + int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); // See if we can optimize any arguments passed through the varargs area of // the call. for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), @@ -815,7 +972,23 @@ } } } - + + if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { + // Inline asm calls cannot throw - mark them 'nounwind'. + CS.setDoesNotThrow(); + Changed = true; + } + + // Try to optimize the call if possible, we require TargetData for most of + // this. None of these calls are seen as possibly dead so go ahead and + // delete the instruction now. + if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { + Instruction *I = tryOptimizeCall(CI, TD); + // If we changed something return the result, etc. Otherwise let + // the fallthrough check. + if (I) return EraseInstFromFunction(*I); + } + return Changed ? CS.getInstruction() : 0; } @@ -823,12 +996,10 @@ // attempt to move the cast to the arguments of the call/invoke. // bool InstCombiner::transformConstExprCastCall(CallSite CS) { - if (!isa<ConstantExpr>(CS.getCalledValue())) return false; - ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); - if (CE->getOpcode() != Instruction::BitCast || - !isa<Function>(CE->getOperand(0))) + Function *Callee = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (Callee == 0) return false; - Function *Callee = cast<Function>(CE->getOperand(0)); Instruction *Caller = CS.getInstruction(); const AttrListPtr &CallerPAL = CS.getAttributes(); @@ -836,9 +1007,9 @@ // would cause a type conversion of one of our arguments, change this call to // be a direct call with arguments casted to the appropriate types. // - const FunctionType *FT = Callee->getFunctionType(); - const Type *OldRetTy = Caller->getType(); - const Type *NewRetTy = FT->getReturnType(); + FunctionType *FT = Callee->getFunctionType(); + Type *OldRetTy = Caller->getType(); + Type *NewRetTy = FT->getReturnType(); if (NewRetTy->isStructTy()) return false; // TODO: Handle multiple return values. @@ -864,6 +1035,19 @@ if (RAttrs & Attribute::typeIncompatible(NewRetTy)) return false; // Attribute not compatible with transformed value. } + + // If the callsite is an invoke instruction, and the return value is used by + // a PHI node in a successor, we cannot change the return type of the call + // because there is no place to put the cast instruction (without breaking + // the critical edge). Bail out in this case. + if (!Caller->use_empty()) + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (PHINode *PN = dyn_cast<PHINode>(*UI)) + if (PN->getParent() == II->getNormalDest() || + PN->getParent() == II->getUnwindDest()) + return false; } unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); @@ -871,15 +1055,28 @@ CallSite::arg_iterator AI = CS.arg_begin(); for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - const Type *ActTy = (*AI)->getType(); + Type *ParamTy = FT->getParamType(i); + Type *ActTy = (*AI)->getType(); if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - if (CallerPAL.getParamAttributes(i + 1) - & Attribute::typeIncompatible(ParamTy)) + unsigned Attrs = CallerPAL.getParamAttributes(i + 1); + if (Attrs & Attribute::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. + + // If the parameter is passed as a byval argument, then we have to have a + // sized type and the sized type has to have the same size as the old type. + if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { + PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); + if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) + return false; + + Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); + if (TD->getTypeAllocSize(CurElTy) != + TD->getTypeAllocSize(ParamPTy->getElementType())) + return false; + } // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. @@ -891,10 +1088,19 @@ if (Callee->isDeclaration() && !isConvertible) return false; } - if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && - Callee->isDeclaration()) - return false; // Do not delete arguments unless we have a function body. + if (Callee->isDeclaration()) { + // Do not delete arguments unless we have a function body. + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) + return false; + // If the callee is just a declaration, don't change the varargsness of the + // call. We don't want to introduce a varargs call where one doesn't + // already exist. + PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); + if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) + return false; + } + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && !CallerPAL.isEmpty()) // In this case we have more arguments than the new function type, but we @@ -908,8 +1114,9 @@ return false; } + // Okay, we decided that this is a safe thing to do: go ahead and start - // inserting cast instructions as necessary... + // inserting cast instructions as necessary. std::vector<Value*> Args; Args.reserve(NumActualArgs); SmallVector<AttributeWithIndex, 8> attrVec; @@ -928,7 +1135,7 @@ AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); + Type *ParamTy = FT->getParamType(i); if ((*AI)->getType() == ParamTy) { Args.push_back(*AI); } else { @@ -955,7 +1162,7 @@ } else { // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { - const Type *PTy = getPromotedType((*AI)->getType()); + Type *PTy = getPromotedType((*AI)->getType()); if (PTy != (*AI)->getType()) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = @@ -981,44 +1188,64 @@ const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), attrVec.end()); - Instruction *NC = CallInst::Create(Callee, Args.begin(), Args.end(), - Caller->getName(), Caller); - CallInst *CI = cast<CallInst>(Caller); - if (CI->isTailCall()) - cast<CallInst>(NC)->setTailCall(); - cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); - cast<CallInst>(NC)->setAttributes(NewCallerPAL); + Instruction *NC; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NC = Builder->CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args); + NC->takeName(II); + cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); + } else { + CallInst *CI = cast<CallInst>(Caller); + NC = Builder->CreateCall(Callee, Args); + NC->takeName(CI); + if (CI->isTailCall()) + cast<CallInst>(NC)->setTailCall(); + cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); + cast<CallInst>(NC)->setAttributes(NewCallerPAL); + } // Insert a cast of the return type as necessary. Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { if (!NV->getType()->isVoidTy()) { - Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, - OldRetTy, false); + Instruction::CastOps opcode = + CastInst::getCastOpcode(NC, false, OldRetTy, false); NV = NC = CastInst::Create(opcode, NC, OldRetTy); + NC->setDebugLoc(Caller->getDebugLoc()); - InsertNewInstBefore(NC, *Caller); + // If this is an invoke instruction, we should insert it after the first + // non-phi, instruction in the normal successor block. + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); + InsertNewInstBefore(NC, *I); + } else { + // Otherwise, it's a call, just insert cast right after the call. + InsertNewInstBefore(NC, *Caller); + } Worklist.AddUsersToWorkList(*Caller); } else { NV = UndefValue::get(Caller->getType()); } } - if (!Caller->use_empty()) - Caller->replaceAllUsesWith(NV); - + ReplaceInstUsesWith(*Caller, NV); + EraseInstFromFunction(*Caller); return true; } -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. +// transformCallThroughTrampoline - Turn a call to a function created by +// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the +// underlying function. // -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { +Instruction * +InstCombiner::transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp) { Value *Callee = CS.getCalledValue(); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); const AttrListPtr &Attrs = CS.getAttributes(); // If the call already has the 'nest' attribute somewhere then give up - @@ -1026,17 +1253,17 @@ if (Attrs.hasAttrSomewhere(Attribute::Nest)) return 0; - IntrinsicInst *Tramp = - cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); + assert(Tramp && + "transformCallThroughTrampoline called with incorrect CallSite."); Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); - const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); - const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); + PointerType *NestFPTy = cast<PointerType>(NestF->getType()); + FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); const AttrListPtr &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - const Type *NestTy = 0; + Type *NestTy = 0; Attributes NestAttr = Attribute::None; // Look for a parameter marked with the 'nest' attribute. @@ -1072,7 +1299,7 @@ // Add the chain argument and attributes. Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) - NestVal = new BitCastInst(NestVal, NestTy, Caller); + NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); } @@ -1098,7 +1325,7 @@ // Handle this by synthesizing a new function type, equal to FTy // with the chain parameter inserted. - std::vector<const Type*> NewTypes; + std::vector<Type*> NewTypes; NewTypes.reserve(FTy->getNumParams()+1); // Insert the chain's type into the list of parameter types, which may @@ -1134,19 +1361,23 @@ const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), NewAttrs.end()); - Instruction *NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - if (cast<CallInst>(Caller)->isTailCall()) - cast<CallInst>(NewCaller)->setTailCall(); - cast<CallInst>(NewCaller)-> - setCallingConv(cast<CallInst>(Caller)->getCallingConv()); - cast<CallInst>(NewCaller)->setAttributes(NewPAL); - - if (!Caller->getType()->isVoidTy()) - Caller->replaceAllUsesWith(NewCaller); - Caller->eraseFromParent(); - Worklist.Remove(Caller); - return 0; + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NewCaller = InvokeInst::Create(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs); + cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); + } else { + NewCaller = CallInst::Create(NewCallee, NewArgs); + if (cast<CallInst>(Caller)->isTailCall()) + cast<CallInst>(NewCaller)->setTailCall(); + cast<CallInst>(NewCaller)-> + setCallingConv(cast<CallInst>(Caller)->getCallingConv()); + cast<CallInst>(NewCaller)->setAttributes(NewPAL); + } + + return NewCaller; } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp index c13fd2e..f10e48a 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -30,6 +31,14 @@ } if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { + // Cannot look past anything that might overflow. + OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); + if (OBI && !OBI->hasNoUnsignedWrap()) { + Scale = 1; + Offset = 0; + return Val; + } + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. @@ -71,14 +80,14 @@ // This requires TargetData to get the alloca alignment and size information. if (!TD) return 0; - const PointerType *PTy = cast<PointerType>(CI.getType()); + PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); // Get the type really allocated and the type casted to. - const Type *AllocElTy = AI.getAllocatedType(); - const Type *CastElTy = PTy->getElementType(); + Type *AllocElTy = AI.getAllocatedType(); + Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); @@ -87,10 +96,8 @@ // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && - CastElTyAlign == AllocElTyAlign) return 0; + // same, we open the door to infinite loops of various kinds. + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); @@ -134,8 +141,8 @@ if (!AI.hasOneUse()) { // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType()); - AI.replaceAllUsesWith(NewCast); + Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); + ReplaceInstUsesWith(AI, NewCast); } return ReplaceInstUsesWith(CI, New); } @@ -145,7 +152,7 @@ /// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -198,7 +205,7 @@ } case Instruction::PHI: { PHINode *OPN = cast<PHINode>(I); - PHINode *NPN = PHINode::Create(Ty); + PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); @@ -213,7 +220,7 @@ } Res->takeName(I); - return InsertNewInstBefore(Res, *I); + return InsertNewInstWith(Res, *I); } @@ -223,12 +230,12 @@ isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction - const Type *DstTy, ///< The target type for the second cast instruction + Type *DstTy, ///< The target type for the second cast instruction TargetData *TD ///< The target data for pointer size ) { - const Type *SrcTy = CI->getOperand(0)->getType(); // A from above - const Type *MidTy = CI->getType(); // B from above + Type *SrcTy = CI->getOperand(0)->getType(); // A from above + Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); @@ -254,7 +261,7 @@ /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, - const Type *Ty) { + Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; @@ -318,7 +325,7 @@ /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateTruncated(Value *V, const Type *Ty) { +static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; @@ -326,7 +333,7 @@ Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - const Type *OrigTy = V->getType(); + Type *OrigTy = V->getType(); // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. @@ -391,6 +398,11 @@ case Instruction::Trunc: // trunc(trunc(x)) -> trunc(x) return true; + case Instruction::ZExt: + case Instruction::SExt: + // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest + // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest + return true; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); return CanEvaluateTruncated(SI->getTrueValue(), Ty) && @@ -424,7 +436,7 @@ return &CI; Value *Src = CI.getOperand(0); - const Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + Type *DestTy = CI.getType(), *SrcTy = Src->getType(); // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -449,6 +461,39 @@ Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } + + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. + Value *A = 0; ConstantInt *Cst = 0; + if (Src->hasOneUse() && + match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { + // We have three types to worry about here, the type of A, the source of + // the truncate (MidSize), and the destination of the truncate. We know that + // ASize < MidSize and MidSize > ResultSize, but don't know the relation + // between ASize and ResultSize. + unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + + // If the shift amount is larger than the size of A, then the result is + // known to be zero because all the input bits got shifted out. + if (Cst->getZExtValue() >= ASize) + return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + + // Since we're doing an lshr and a zero extend, and know that the shift + // amount is smaller than ASize, it is always safe to do the shift in A's + // type, then zero extend or truncate to the result. + Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + } + + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest + // type isn't non-native. + if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && + ShouldChangeType(Src->getType(), CI.getType()) && + match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) { + Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr"); + return BinaryOperator::CreateAnd(NewTrunc, + ConstantExpr::getTrunc(Cst, CI.getType())); + } return 0; } @@ -472,13 +517,13 @@ Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits()-1); - In = Builder->CreateLShr(In, Sh); + In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); if (In->getType() != CI.getType()) In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One); + In = Builder->CreateXor(In, One, In->getName()+".not"); } return ReplaceInstUsesWith(CI, In); @@ -522,7 +567,8 @@ if (ShiftAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt)); + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), + In->getName()+".lobit"); } if ((Op1CV != 0) == isNE) { // Toggle the low bit. @@ -532,8 +578,7 @@ if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); - else - return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); } } } @@ -542,7 +587,7 @@ // It is also profitable to transform icmp eq into not(xor(A, B)) because that // may lead to additional simplifications. if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { + if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { uint32_t BitWidth = ITy->getBitWidth(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -600,7 +645,7 @@ /// clear the top bits anyway, doing this has no extra cost. /// /// This function works on both vectors and scalars. -static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) { +static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; @@ -714,7 +759,7 @@ return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -766,7 +811,7 @@ if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst); + Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); } @@ -796,8 +841,8 @@ if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { - Value *LCast = Builder->CreateZExt(LHS, CI.getType()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType()); + Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); return BinaryOperator::Create(Instruction::Or, LCast, RCast); } } @@ -840,6 +885,102 @@ return 0; } +/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { + Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative + // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive + if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) || + (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) { + + Value *Sh = ConstantInt::get(Op0->getType(), + Op0->getType()->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); + + if (Pred == ICmpInst::ICMP_SGT) + In = Builder->CreateNot(In, In->getName()+".not"); + return ReplaceInstUsesWith(CI, In); + } + + // If we know that only one bit of the LHS of the icmp can be set and we + // have an equality comparison with zero or a power of 2, we can transform + // the icmp and sext into bitwise/integer operations. + if (ICI->hasOneUse() && + ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ + unsigned BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { + Value *In = ICI->getOperand(0); + + // If the icmp tests for a known zero bit we can constant fold it. + if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) { + Value *V = Pred == ICmpInst::ICMP_NE ? + ConstantInt::getAllOnesValue(CI.getType()) : + ConstantInt::getNullValue(CI.getType()); + return ReplaceInstUsesWith(CI, V); + } + + if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { + // sext ((x & 2^n) == 0) -> (x >> n) - 1 + // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 + unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); + // Perform a right shift to place the desired bit in the LSB. + if (ShiftAmt) + In = Builder->CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // At this point "In" is either 1 or 0. Subtract 1 to turn + // {1, 0} -> {0, -1}. + In = Builder->CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); + } else { + // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 + // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 + unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); + // Perform a left shift to place the desired bit in the MSB. + if (ShiftAmt) + In = Builder->CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // Distribute the bit over the whole bit width. + In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), + BitWidth - 1), "sext"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/); + } + } + } + + // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. + if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { + if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && + Op0->getType() == CI.getType()) { + Type *EltTy = VTy->getElementType(); + + // splat the shift constant to a constant vector. + Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit"); + return ReplaceInstUsesWith(CI, In); + } + } + + return 0; +} + /// CanEvaluateSExtd - Return true if we can take the specified value /// and return it as type Ty without inserting any new casts and without /// changing the value of the common low bits. This is used by code that tries @@ -848,7 +989,7 @@ /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateSExtd(Value *V, const Type *Ty) { +static bool CanEvaluateSExtd(Value *V, Type *Ty) { assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && "Can't sign extend type to a smaller type"); // If this is a constant, it can be trivially promoted. @@ -923,7 +1064,7 @@ return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -947,7 +1088,7 @@ // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt), + return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), ShAmt); } @@ -960,34 +1101,13 @@ // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); - Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt); + Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); return BinaryOperator::CreateAShr(Res, ShAmt); } - - - // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - { - ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) { - // sext (x <s 0) to i32 --> x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) || - (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) { - Value *Sh = ConstantInt::get(CmpLHS->getType(), - CmpLHS->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, Sh); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); - - if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In); - return ReplaceInstUsesWith(CI, In); - } - } - } - - + + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) + return transformSExtICmp(ICI, CI); + // If the input is a shl/ashr pair of a same constant, then this is a sign // extension from a smaller value. If we could trust arbitrary bitwidth // integers, we could turn this into a truncate to the smaller bit and then @@ -1011,7 +1131,7 @@ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - A = Builder->CreateShl(A, ShAmtV); + A = Builder->CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } @@ -1073,7 +1193,7 @@ case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - const Type *SrcTy = OpI->getType(); + Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); if (LHSTrunc->getType() != SrcTy && @@ -1097,7 +1217,8 @@ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && Call->getCalledFunction()->getName() == "sqrt" && - Call->getNumArgOperands() == 1) { + Call->getNumArgOperands() == 1 && + Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); if (Arg && Arg->getOpcode() == Instruction::FPExt && CI.getType()->isFloatTy() && @@ -1106,7 +1227,7 @@ Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant* SqrtfFunc = M->getOrInsertFunction("sqrtf", + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", Callee->getAttributes(), Builder->getFloatTy(), Builder->getFloatTy(), @@ -1114,6 +1235,11 @@ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); + + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. + ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); + EraseInstFromFunction(*Call); return ret; } } @@ -1226,7 +1352,7 @@ // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); - const Type *GEPIdxTy = + Type *GEPIdxTy = cast<PointerType>(OrigBase->getType())->getElementType(); SmallVector<Value*, 8> NewIndices; if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { @@ -1234,9 +1360,8 @@ // and bitcast the result. This eliminates one bitcast, potentially // two. Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, - NewIndices.begin(), NewIndices.end()) : - Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); + Builder->CreateInBoundsGEP(OrigBase, NewIndices) : + Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1275,12 +1400,12 @@ /// replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, +static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. - const VectorType *SrcTy = cast<VectorType>(InVal->getType()); + VectorType *SrcTy = cast<VectorType>(InVal->getType()); if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the @@ -1300,7 +1425,7 @@ // size of the input. SmallVector<Constant*, 16> ShuffleMask; Value *V2; - const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low @@ -1323,27 +1448,219 @@ ConstantInt::get(Int32Ty, SrcElts)); } - Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size()); - return new ShuffleVectorInst(InVal, V2, Mask); + return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask)); } +static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { + return Value % Ty->getPrimitiveSizeInBits() == 0; +} + +static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { + return Value / Ty->getPrimitiveSizeInBits(); +} + +/// CollectInsertionElements - V is a value which is inserted into a vector of +/// VecEltTy. Look through the value to see if we can decompose it into +/// insertions into the vector. See the example in the comment for +/// OptimizeIntegerToVectorInsertions for the pattern this handles. +/// The type of V is always a non-zero multiple of VecEltTy's size. +/// +/// This returns false if the pattern can't be matched or true if it can, +/// filling in Elements with the elements found here. +static bool CollectInsertionElements(Value *V, unsigned ElementIndex, + SmallVectorImpl<Value*> &Elements, + Type *VecEltTy) { + // Undef values never contribute useful bits to the result. + if (isa<UndefValue>(V)) return true; + + // If we got down to a value of the right type, we win, try inserting into the + // right element. + if (V->getType() == VecEltTy) { + // Inserting null doesn't actually insert any elements. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return true; + + // Fail if multiple elements are inserted into this slot. + if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) + return false; + + Elements[ElementIndex] = V; + return true; + } + + if (Constant *C = dyn_cast<Constant>(V)) { + // Figure out the # elements this provides, and bitcast it or slice it up + // as required. + unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(), + VecEltTy); + // If the constant is the size of a vector element, we just need to bitcast + // it to the right type so it gets properly inserted. + if (NumElts == 1) + return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), + ElementIndex, Elements, VecEltTy); + + // Okay, this is a constant that covers multiple elements. Slice it up into + // pieces and insert each element-sized piece into the vector. + if (!isa<IntegerType>(C->getType())) + C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(), + C->getType()->getPrimitiveSizeInBits())); + unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); + Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); + + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), + i*ElementSize)); + Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); + if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy)) + return false; + } + return true; + } + + if (!V->hasOneUse()) return false; + + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return false; + switch (I->getOpcode()) { + default: return false; // Unhandled case. + case Instruction::BitCast: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::ZExt: + if (!isMultipleOfTypeSize( + I->getOperand(0)->getType()->getPrimitiveSizeInBits(), + VecEltTy)) + return false; + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::Or: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy) && + CollectInsertionElements(I->getOperand(1), ElementIndex, + Elements, VecEltTy); + case Instruction::Shl: { + // Must be shifting by a constant that is a multiple of the element size. + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; + unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); + + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, + Elements, VecEltTy); + } + + } +} + + +/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we +/// may be doing shifts and ors to assemble the elements of the vector manually. +/// Try to rip the code out and replace it with insertelements. This is to +/// optimize code like this: +/// +/// %tmp37 = bitcast float %inc to i32 +/// %tmp38 = zext i32 %tmp37 to i64 +/// %tmp31 = bitcast float %inc5 to i32 +/// %tmp32 = zext i32 %tmp31 to i64 +/// %tmp33 = shl i64 %tmp32, 32 +/// %ins35 = or i64 %tmp33, %tmp38 +/// %tmp43 = bitcast i64 %ins35 to <2 x float> +/// +/// Into two insertelements that do "buildvector{%inc, %inc5}". +static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, + InstCombiner &IC) { + VectorType *DestVecTy = cast<VectorType>(CI.getType()); + Value *IntInput = CI.getOperand(0); + + SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); + if (!CollectInsertionElements(IntInput, 0, Elements, + DestVecTy->getElementType())) + return 0; + + // If we succeeded, we know that all of the element are specified by Elements + // or are zero if Elements has a null entry. Recast this as a set of + // insertions. + Value *Result = Constant::getNullValue(CI.getType()); + for (unsigned i = 0, e = Elements.size(); i != e; ++i) { + if (Elements[i] == 0) continue; // Unset element. + + Result = IC.Builder->CreateInsertElement(Result, Elements[i], + IC.Builder->getInt32(i)); + } + + return Result; +} + + +/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double +/// bitcast. The various long double bitcasts can't get in here. +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + Value *Src = CI.getOperand(0); + Type *DestTy = CI.getType(); + + // If this is a bitcast from int to float, check to see if the int is an + // extraction from a vector. + Value *VecInput = 0; + // bitcast(trunc(bitcast(somevector))) + if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && + isa<VectorType>(VecInput->getType())) { + VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + } + } + + // bitcast(trunc(lshr(bitcast(somevector), cst)) + ConstantInt *ShAmt = 0; + if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShAmt)))) && + isa<VectorType>(VecInput->getType())) { + VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && + ShAmt->getZExtValue() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + unsigned Elt = ShAmt->getZExtValue() / DestWidth; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + } + } + return 0; +} Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); + Type *SrcTy = Src->getType(); + Type *DestTy = CI.getType(); // Get rid of casts from one type to the same type. These are useless and can // be replaced by the operand. if (DestTy == Src->getType()) return ReplaceInstUsesWith(CI, Src); - if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { - const PointerType *SrcPTy = cast<PointerType>(SrcTy); - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); + if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { + PointerType *SrcPTy = cast<PointerType>(SrcTy); + Type *DstElTy = DstPTy->getElementType(); + Type *SrcElTy = SrcPTy->getElementType(); // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. @@ -1374,12 +1691,16 @@ // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), - ((Instruction*)NULL)); + return GetElementPtrInst::CreateInBounds(Src, Idxs); } } + + // Try to optimize int -> float bitcasts. + if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + return I; - if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { + if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, @@ -1387,20 +1708,28 @@ // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - // If this is a cast from an integer to vector, check to see if the input - // is a trunc or zext of a bitcast from vector. If so, we can replace all - // the casts with a shuffle and (potentially) a bitcast. - if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){ - CastInst *SrcCast = cast<CastInst>(Src); - if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) - if (isa<VectorType>(BCIn->getOperand(0)->getType())) - if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + if (isa<IntegerType>(SrcTy)) { + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) { + CastInst *SrcCast = cast<CastInst>(Src); + if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) + if (isa<VectorType>(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), cast<VectorType>(DestTy), *this)) - return I; + return I; + } + + // If the input is an 'or' instruction, we may be doing shifts and ors to + // assemble the elements of the vector manually. Try to rip the code out + // and replace it with insertelements. + if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this)) + return ReplaceInstUsesWith(CI, V); } } - if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { Value *Elem = Builder->CreateExtractElement(Src,
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8fd3c58..bb1cbfa 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -13,7 +13,9 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" @@ -21,13 +23,17 @@ using namespace llvm; using namespace PatternMatch; +static ConstantInt *getOne(Constant *C) { + return ConstantInt::get(cast<IntegerType>(C->getType()), 1); +} + /// AddOne - Add one to a ConstantInt static Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C) { - return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); +static Constant *SubOne(Constant *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { @@ -37,13 +43,12 @@ static bool HasAddOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ult(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().sgt(In1->getValue()); + return Result->getValue().slt(In1->getValue()); } /// AddWithOverflow - Compute Result = In1+In2, returning true if the result @@ -52,7 +57,7 @@ Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getAdd(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasAddOverflow(ExtractElement(Result, Idx), @@ -72,13 +77,13 @@ static bool HasSubOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ugt(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().slt(In1->getValue()); + + return Result->getValue().sgt(In1->getValue()); } /// SubWithOverflow - Compute Result = In1-In2, returning true if the result @@ -87,7 +92,7 @@ Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getSub(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasSubOverflow(ExtractElement(Result, Idx), @@ -123,9 +128,8 @@ case ICmpInst::ICMP_UGT: // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); - case ICmpInst::ICMP_UGE: + return RHS->isMaxValue(true); + case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; return RHS->getValue().isSignBit(); @@ -140,7 +144,7 @@ return (~CI->getValue() + 1).isPowerOf2(); } -/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a /// set of known zero and one bits, compute the maximum and minimum values that /// could have the specified known zero and known one bits, returning them in /// min/max. @@ -157,10 +161,10 @@ // bit if it is unknown. Min = KnownOne; Max = KnownOne|UnknownBits; - + if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.set(Min.getBitWidth()-1); - Max.clear(Max.getBitWidth()-1); + Min.setBit(Min.getBitWidth()-1); + Max.clearBit(Max.getBitWidth()-1); } } @@ -176,7 +180,7 @@ KnownZero.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); APInt UnknownBits = ~(KnownZero|KnownOne); - + // The minimum value is when the unknown bits are all zeros. Min = KnownOne; // The maximum value is when the unknown bits are all ones. @@ -198,10 +202,10 @@ CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. if (!GEP->isInBounds() && TD == 0) return 0; - + ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); if (Init == 0 || Init->getNumOperands() > 1024) return 0; - + // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. // @@ -216,31 +220,31 @@ // type they index. Collect the indices. This is typically for arrays of // structs. SmallVector<unsigned, 4> LaterIndices; - - const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); + + Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (Idx == 0) return 0; // Variable index. - + uint64_t IdxVal = Idx->getZExtValue(); if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. - - if (const StructType *STy = dyn_cast<StructType>(EltTy)) + + if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); - else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { + else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { if (IdxVal >= ATy->getNumElements()) return 0; EltTy = ATy->getElementType(); } else { return 0; // Unknown type. } - + LaterIndices.push_back(IdxVal); } - + enum { Overdefined = -3, Undefined = -2 }; // Variables for our state machines. - + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form // "i == 47 | i == 87", where 47 is the first index the condition is true for, // and 87 is the second (and last) index. FirstTrueElement is -2 when @@ -251,7 +255,7 @@ // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the // form "i != 47 & i != 87". Same state transitions as for true elements. int FirstFalseElement = Undefined, SecondFalseElement = Undefined; - + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these /// define a state machine that triggers for ranges of values that the index /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. @@ -259,26 +263,25 @@ /// index in the range (inclusive). We use -2 for undefined here because we /// use relative comparisons and don't want 0-1 to match -1. int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; - + // MagicBitvector - This is a magic bitvector where we set a bit if the // comparison is true for element 'i'. If there are 64 elements or less in // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - - + + // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { Constant *Elt = Init->getOperand(i); - + // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), - LaterIndices.size()); - + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); + // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); - + // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, CompareRHS, TD); @@ -292,15 +295,15 @@ FalseRangeEnd = i; continue; } - + // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. if (!isa<ConstantInt>(C)) return 0; - + // Otherwise, we know if the comparison is true or false for this element, // update our state machines. bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); - + // State machine for single/double/range index comparison. if (IsTrueForElt) { // Update the TrueElement state machine. @@ -312,7 +315,7 @@ SecondTrueElement = i; else SecondTrueElement = Overdefined; - + // Update range state machine. if (TrueRangeEnd == (int)i-1) TrueRangeEnd = i; @@ -329,7 +332,7 @@ SecondFalseElement = i; else SecondFalseElement = Overdefined; - + // Update range state machine. if (FalseRangeEnd == (int)i-1) FalseRangeEnd = i; @@ -337,12 +340,12 @@ FalseRangeEnd = Overdefined; } } - - + + // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; - + // If all of our states become overdefined, bail out early. Since the // predicate is expensive, only check it every 8 elements. This is only // really useful for really huge arrays. @@ -362,20 +365,20 @@ if (!GEP->isInBounds() && Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); - + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); - + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); - + // True for one element -> 'i == 47'. if (SecondTrueElement == Undefined) return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); - + // True for two elements -> 'i == 47 | i == 72'. Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); @@ -389,36 +392,36 @@ // None false -> true. if (FirstFalseElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); - + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); // False for one element -> 'i != 47'. if (SecondFalseElement == Undefined) return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); - + // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } - + // If the comparison can be replaced with a range comparison for the elements // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - + // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). if (FirstTrueElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), TrueRangeEnd-FirstTrueElement+1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } - + // False range check. if (FalseRangeEnd != Overdefined) { assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); @@ -427,19 +430,19 @@ Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), FalseRangeEnd-FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } - - + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 if (Init->getNumOperands() <= 32 || (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { - const Type *Ty; + Type *Ty; if (Init->getNumOperands() <= 32) Ty = Type::getInt32Ty(Init->getContext()); else @@ -449,7 +452,7 @@ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } - + return 0; } @@ -463,12 +466,11 @@ /// to generate the first by knowing that pointer arithmetic doesn't overflow. /// /// If we can't emit an optimized form for this expression, this returns null. -/// -static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, - InstCombiner &IC) { +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - + // Check to see if this gep only has a single variable index. If so, and if // any constant indices are a multiple of its scale, then we can compute this // in terms of the scale of the variable index. For example, if the GEP @@ -480,9 +482,9 @@ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -493,33 +495,33 @@ break; } } - + // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. if (i == e) return 0; - + Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is // 4 if the variable index is into an array of i32. uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - + // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (!CI) return 0; - + // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*CI->getSExtValue(); } } - + // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. @@ -528,19 +530,20 @@ // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, - TD.getIntPtrType(VariableIdx->getContext()), &I); + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + } return VariableIdx; } - + // Otherwise, there is an index. The computation we will do will be modulo // the pointer size, so get it. uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - + Offset &= PtrSizeMask; VariableScale &= PtrSizeMask; - + // To do this transformation, any constant index must be a multiple of the // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a @@ -548,14 +551,14 @@ int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) return 0; - + // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) - VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, - true /*SExt*/, &I); + VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + true /*Signed*/); Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, &I); + return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); } /// FoldGEPICmp - Fold comparisons between a GEP instruction and something @@ -573,8 +576,8 @@ // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); - + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); + // If not, synthesize the offset the hard way. if (Offset == 0) Offset = EmitGEPOffset(GEPLHS); @@ -627,6 +630,7 @@ if (AllZeros) return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { // If the GEPs only differ by one index, compare it. unsigned NumDifferences = 0; // Keep track of # differences. @@ -649,7 +653,7 @@ ConstantInt::get(Type::getInt1Ty(I.getContext()), ICmpInst::isTrueWhenEqual(Cond))); - else if (NumDifferences == 1) { + else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); // Make sure we do a signed comparison here. @@ -660,6 +664,7 @@ // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! if (TD && + GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) @@ -682,7 +687,7 @@ bool isTrue = ICmpInst::isTrueWhenEqual(Pred); return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); } - + // (X+4) == X -> false. if (Pred == ICmpInst::ICMP_EQ) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); @@ -691,40 +696,25 @@ if (Pred == ICmpInst::ICMP_NE) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. - bool isNUW = false, isNSW = false; - if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) { - isNUW = Add->hasNoUnsignedWrap(); - isNSW = Add->hasNoSignedWrap(); - } - // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" + // so the values can never be equal. Similarly for all other "or equals" // operators. - + // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - // If this is an NUW add, then this is always false. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - Value *R = + Value *R = ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); return new ICmpInst(ICmpInst::ICMP_UGT, X, R); } - + // (X+1) >u X --> X <u (0-1) --> X != 255 // (X+2) >u X --> X <u (0-2) --> X <u 254 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 - if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { - // If this is an NUW add, then this is always true. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - } - + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); ConstantInt *SMax = ConstantInt::get(X->getContext(), APInt::getSignedMaxValue(BitWidth)); @@ -735,31 +725,16 @@ // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always false, if negative, this is always true. - if (isNSW) { - bool isTrue = CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - } - + // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 - - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always true, if negative, this is always false. - if (isNSW) { - bool isTrue = !CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); @@ -771,15 +746,15 @@ ConstantInt *DivRHS) { ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide + + // FIXME: If the operand types don't match the type of the divide // then don't attempt this transform. The code below doesn't have the // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) <s C2 produces different + // vice versa). This is because (x /s C1) <s C2 produces different // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even - // (x /u C1) <u C2. Simply casting the operands and result won't - // work. :( The if statement below tests that condition and bails - // if it finds it. + // (x /u C1) <u C2. Simply casting the operands and result won't + // work. :( The if statement below tests that condition and bails + // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) return 0; @@ -787,27 +762,33 @@ return 0; // The ProdOV computation fails on divide by zero. if (DivIsSigned && DivRHS->isAllOnesValue()) return 0; // The overflow computation also screws up here - if (DivRHS->isOne()) - return 0; // Not worth bothering, and eliminates some funny cases - // with INT_MIN. + if (DivRHS->isOne()) { + // This eliminates some funny cases with INT_MIN. + ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. + return &ICI; + } // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. + // as in the LHS instruction that we're folding. bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; // Get the ICmp opcode ICmpInst::Predicate Pred = ICI.getPredicate(); + /// If the division is known to be exact, then there is no remainder from the + /// divide, so the covered range size is unit, otherwise it is the divisor. + ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; + // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). // Compute this interval based on the constants involved and the signedness of // the compare/divide. This computes a half-open interval, keeping track of // whether either value in the interval overflows. After analysis each @@ -815,38 +796,43 @@ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; Constant *LoBound = 0, *HiBound = 0; - + if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) LoBound = Prod; HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + if (!HiOverflow) { + // If this is not an exact divide, then many values in the range collapse + // to the same result value. + HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); + } + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; + LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); + HiBound = RangeSize; } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) HiOverflow = LoOverflow = ProdOV; if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); } else { // (X / pos) op neg // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) HiBound = AddOne(Prod); LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { - ConstantInt* DivNeg = - cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; - } + } } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + } else if (DivRHS->isNegative()) { // Divisor is < 0. + if (DivI->isExact()) + RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (CmpRHSV == 0) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS); - HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + LoBound = AddOne(RangeSize); + HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN @@ -856,14 +842,14 @@ HiBound = AddOne(Prod); HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; + LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; } else { // (X / neg) op neg LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) LoOverflow = HiOverflow = ProdOV; if (!HiOverflow) - HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); } - + // Dividing by a negative swaps the condition. LT <-> GT Pred = ICmpInst::getSwappedPredicate(Pred); } @@ -880,9 +866,8 @@ if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); - return ReplaceInstUsesWith(ICI, - InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true)); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); @@ -905,15 +890,102 @@ case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow == -1) // High bound less than input range. + if (HiOverflow == -1) // High bound less than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); } } +/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". +Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, + ConstantInt *ShAmt) { + const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = CmpRHSV.getBitWidth(); + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + if (ShAmtVal >= TypeBits || ShAmtVal == 0) + return 0; + + if (!ICI.isEquality()) { + // If we have an unsigned comparison and an ashr, we can't simplify this. + // Similarly for signed comparisons with lshr. + if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) + return 0; + + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv + // by a power of 2. Since we already have logic to simplify these, + // transform to div and then simplify the resultant comparison. + if (Shr->getOpcode() == Instruction::AShr && + (!Shr->isExact() || ShAmtVal == TypeBits - 1)) + return 0; + + // Revisit the shift (to delete it). + Worklist.Add(Shr); + + Constant *DivCst = + ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); + + Value *Tmp = + Shr->getOpcode() == Instruction::AShr ? + Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : + Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); + + ICI.setOperand(0, Tmp); + + // If the builder folded the binop, just return it. + BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); + if (TheDiv == 0) + return &ICI; + + // Otherwise, fold this div/compare. + assert(TheDiv->getOpcode() == Instruction::SDiv || + TheDiv->getOpcode() == Instruction::UDiv); + + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); + assert(Res && "This div/cst should have folded!"); + return Res; + } + + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = CmpRHSV << ShAmtVal; + ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp); + if (Shr->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (Shr->hasOneUse() && Shr->isExact()) + return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); + + if (Shr->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(ICI.getContext(), Val); + + Value *And = Builder->CreateAnd(Shr->getOperand(0), + Mask, Shr->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); + } + return 0; +} + /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". /// @@ -921,7 +993,7 @@ Instruction *LHSI, ConstantInt *RHS) { const APInt &RHSV = RHS->getValue(); - + switch (LHSI->getOpcode()) { case Instruction::Trunc: if (ICI.isEquality() && LHSI->hasOneUse()) { @@ -932,19 +1004,18 @@ APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - + // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. - APInt NewRHS(RHS->getValue()); - NewRHS.zext(SrcBits); + APInt NewRHS = RHS->getValue().zext(SrcBits); NewRHS |= KnownOne; return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantInt::get(ICI.getContext(), NewRHS)); } } break; - + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { // If this is a comparison that tests the signbit (X < 0) or (x > -1), @@ -952,21 +1023,21 @@ if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { Value *CompareVal = LHSI->getOperand(0); - + // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { + if (!XorCST->isNegative()) { ICI.setOperand(0, CompareVal); Worklist.Add(LHSI); return &ICI; } - + // Was the old condition true if the operand is positive? bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - + // If so, the new one isn't. isTrueIfPositive ^= true; - + if (isTrueIfPositive) return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS)); @@ -988,7 +1059,7 @@ } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + if (!ICI.isEquality() && XorCST->isMaxValue(true)) { const APInt &NotSignBit = XorCST->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() @@ -1005,32 +1076,42 @@ if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && LHSI->getOperand(0)->hasOneUse()) { ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); - + // If the LHS is an AND of a truncating cast, we can widen the // and/compare to be the input width without changing the value // produced, eliminating a cast. if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. + // have its sign bit set or if it is an equality comparison. // Extending a relational comparison when we're checking the sign // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue(); - NewCST.zext(BitWidth); - APInt NewCI = RHSV; - NewCI.zext(BitWidth); - Value *NewAnd = + if (ICI.isEquality() || + (!AndCST->isNegative() && RHSV.isNonNegative())) { + Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(ICI.getContext(), NewCST)); + ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); + NewAnd->takeName(LHSI); return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(ICI.getContext(), NewCI)); + ConstantExpr::getZExt(RHS, Cast->getSrcTy())); } } - + + // If the LHS is an AND of a zext, and we have an equality compare, we can + // shrink the and/compare to the smaller type, eliminating the cast. + if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { + IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); + // Make sure we don't compare the upper bits, SimplifyDemandedBits + // should fold the icmp to true/false in that case. + if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantExpr::getTrunc(AndCST, Ty)); + NewAnd->takeName(LHSI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantExpr::getTrunc(RHS, Ty)); + } + } + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This // happens a LOT in code produced by the C front-end, for bitfield @@ -1038,12 +1119,12 @@ BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) Shift = 0; - + ConstantInt *ShAmt; ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - + Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + Type *AndTy = AndCST->getType(); // Type of the and. + // We can fold this as long as we can't shift unknown bits // into the mask. This can only happen with signed shift // rights, as they sign-extend. @@ -1054,20 +1135,20 @@ // of the bits shifted in could be tested after the mask. uint32_t TyBits = Ty->getPrimitiveSizeInBits(); int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & AndCST->getValue()) == 0) CanFold = true; } - + if (CanFold) { Constant *NewCst; if (Shift->getOpcode() == Instruction::Shl) NewCst = ConstantExpr::getLShr(RHS, ShAmt); else NewCst = ConstantExpr::getShl(RHS, ShAmt); - + // Check to see if we are shifting out any of the bits being // compared. if (ConstantExpr::get(Shift->getOpcode(), @@ -1095,7 +1176,7 @@ } } } - + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is // preferable because it allows the C<<Y expression to be hoisted out // of a loop if Y is invariant and X is not. @@ -1110,16 +1191,16 @@ // Insert a logical shift. NS = Builder->CreateLShr(AndCST, Shift->getOperand(1)); } - + // Compute X & (C << Y). - Value *NewAnd = - Builder->CreateAnd(Shift->getOperand(0), NS); - + Value *NewAnd = + Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); + ICI.setOperand(0, NewAnd); return &ICI; } } - + // Try to optimize things like "A[i]&42 == 0" to index computations. if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { if (GetElementPtrInst *GEP = @@ -1141,7 +1222,6 @@ if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 // -> and (icmp eq P, null), (icmp eq Q, null). - Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, Constant::getNullValue(P->getType())); Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, @@ -1155,19 +1235,19 @@ } break; } - + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; - + uint32_t TypeBits = RHSV.getBitWidth(); - + // Check that the shift amount is in range. If not, don't perform // undefined shifts. When the shift is visited it will be // simplified. if (ShAmt->uge(TypeBits)) break; - + if (ICI.isEquality()) { // If we are comparing against bits always shifted out, the // comparison cannot succeed. @@ -1180,96 +1260,66 @@ ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } - + + // If the shift is NUW, then it is just shifting out zeros, no need for an + // AND. + if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); Constant *Mask = - ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, + ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - + Value *And = - Builder->CreateAnd(LHSI->getOperand(0),Mask); + Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(ICI.getContext(), - RHSV.lshr(ShAmtVal))); + ConstantExpr::getLShr(RHS, ShAmt)); } } - + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) <s 0 --> (X&1) != 0 - Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) << - (TypeBits-ShAmt->getZExtValue()-1)); + Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), + APInt::getOneBitSet(TypeBits, + TypeBits-ShAmt->getZExtValue()-1)); Value *And = - Builder->CreateAnd(LHSI->getOperand(0), Mask); + Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(And->getType())); } break; } - + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) case Instruction::AShr: { - // Only handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); - if (!ShAmt || !ICI.isEquality()) break; + // Handle equality comparisons of shift-by-constant. + BinaryOperator *BO = cast<BinaryOperator>(LHSI); + if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { + if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) + return Res; + } - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), - IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - // Otherwise, check to see if the bits shifted out are known to be zero. - // If so, we can compare against the unshifted value: - // (X & 4) >> 1 == 2 --> (X & 4) == 4. - if (LHSI->hasOneUse() && - MaskedValueIsZero(LHSI->getOperand(0), - APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantExpr::getShl(RHS, ShAmt)); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(ICI.getContext(), Val); - - Value *And = Builder->CreateAnd(LHSI->getOperand(0), - Mask); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); + // Handle exact shr's. + if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { + if (RHSV.isMinValue()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); } break; } - + case Instruction::SDiv: case Instruction::UDiv: // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) @@ -1308,12 +1358,12 @@ } break; } - + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. if (ICI.isEquality()) { bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and // the second operand is a constant, simplify a bit. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { switch (BO->getOpcode()) { @@ -1323,7 +1373,8 @@ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); if (V.sgt(1) && V.isPowerOf2()) { Value *NewRem = - Builder->CreateURem(BO->getOperand(0), BO->getOperand(1)); + Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); return new ICmpInst(ICI.getPredicate(), NewRem, Constant::getNullValue(BO->getType())); } @@ -1339,12 +1390,12 @@ // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - + if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) + if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { + if (BO->hasOneUse()) { Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); @@ -1354,18 +1405,27 @@ case Instruction::Xor: // For the xor case, we can xor two constants together, eliminating // the explicit xor. - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH - case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) + } else if (RHSV == 0) { + // Replace ((xor A, B) != 0) with (A != B) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), BO->getOperand(1)); + } break; - + case Instruction::Sub: + // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. + if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), + ConstantExpr::getSub(BOp0C, RHS)); + } else if (RHSV == 0) { + // Replace ((sub A, B) != 0) with (A != B) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + } + break; case Instruction::Or: // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! @@ -1373,11 +1433,11 @@ Constant *NotCI = ConstantExpr::getNot(RHS); if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); } break; - + case Instruction::And: if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { // If bits are being compared against that are and'd out, then the @@ -1386,27 +1446,31 @@ return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); - + // If we have ((X & C) == C), turn it into ((X & C) != 0). if (RHS == BOC && RHSV.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, Constant::getNullValue(RHS->getType())); - + + // Don't perform the following transforms if the AND has multiple uses + if (!BO->hasOneUse()) + break; + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(pred, X, Zero); } - + // ((X & ~7) == 0) --> X < 8 if (RHSV == 0 && isHighOnes(BOC)) { Value *X = BO->getOperand(0); Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(pred, X, NegX); } @@ -1454,11 +1518,11 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); + Type *SrcTy = LHSCIOp->getType(); + Type *DestTy = LHSCI->getType(); Value *RHSCIOp; - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && TD->getPointerSizeInBits() == @@ -1476,7 +1540,7 @@ if (RHSOp) return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); } - + // The code below only handles extension cast instructions, so far. // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && @@ -1489,9 +1553,9 @@ if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) + if (RHSCIOp->getType() != LHSCIOp->getType()) return 0; - + // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) @@ -1536,57 +1600,181 @@ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); } - // The re-extended constant changed so the constant cannot be represented + // The re-extended constant changed so the constant cannot be represented // in the shorter type. Consequently, we cannot emit a simple comparison. + // All the cases that fold to true or false will have already been handled + // by SimplifyICmpInst, so only deal with the tricky case. - // First, handle some easy cases. We know the result cannot be equal at this - // point so handle the ICI.isEquality() cases - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (isSignedCmp || !isSignedExt) + return 0; // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. - Value *Result; - if (isSignedCmp) { - // We're performing a signed comparison. - if (cast<ConstantInt>(CI)->getValue().isNegative()) - Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false - else - Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true - } else { - // We're performing an unsigned comparison. - if (isSignedExt) { - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Result = Builder->CreateICmpSGT(LHSCIOp, NegOne); - } else { - // Unsigned extend & unsigned compare -> always true. - Result = ConstantInt::getTrue(ICI.getContext()); - } - } + + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); // Finally, return the value computed. - if (ICI.getPredicate() == ICmpInst::ICMP_ULT || - ICI.getPredicate() == ICmpInst::ICMP_SLT) + if (ICI.getPredicate() == ICmpInst::ICMP_ULT) return ReplaceInstUsesWith(ICI, Result); - assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || - ICI.getPredicate()==ICmpInst::ICMP_SGT) && - "ICmp should be folded!"); - if (Constant *CI = dyn_cast<Constant>(Result)) - return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); + assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); return BinaryOperator::CreateNot(Result); } +/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: +/// I = icmp ugt (add (add A, B), CI2), CI1 +/// If this is of the form: +/// sum = a + b +/// if (sum+128 >u 255) +/// Then replace it with llvm.sadd.with.overflow.i8. +/// +static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, + ConstantInt *CI2, ConstantInt *CI1, + InstCombiner &IC) { + // The transformation we're trying to do here is to transform this into an + // llvm.sadd.with.overflow. To do this, we have to replace the original add + // with a narrower add, and discard the add-with-constant that is part of the + // range check (if we can't eliminate it, this isn't profitable). + // In order to eliminate the add-with-constant, the compare can be its only + // use. + Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); + if (!AddWithCst->hasOneUse()) return 0; + + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. + if (!CI2->getValue().isPowerOf2()) return 0; + unsigned NewWidth = CI2->getValue().countTrailingZeros(); + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + + // The width of the new add formed is 1 more than the bias. + ++NewWidth; + + // Check to see that CI1 is an all-ones value with NewWidth bits. + if (CI1->getBitWidth() == NewWidth || + CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) + return 0; + + // In order to replace the original add with a narrower + // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant + // and truncates that discard the high bits of the add. Verify that this is + // the case. + Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); + for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); + UI != E; ++UI) { + if (*UI == AddWithCst) continue; + + // Only accept truncates for now. We would really like a nice recursive + // predicate like SimplifyDemandedBits, but which goes downwards the use-def + // chain to see which bits of a value are actually demanded. If the + // original add had another add which was then immediately truncated, we + // could still do the transformation. + TruncInst *TI = dyn_cast<TruncInst>(*UI); + if (TI == 0 || + TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + } + + // If the pattern matches, truncate the inputs to the narrower type and + // use the sadd_with_overflow intrinsic to efficiently compute both the + // result and the overflow bit. + Module *M = I.getParent()->getParent()->getParent(); + + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + NewType); + + InstCombiner::BuilderTy *Builder = IC.Builder; + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + Builder->SetInsertPoint(OrigAdd); + + Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); + Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); + CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); + Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + + // The inner add was the result of the narrow add, zero extended to the + // wider type. Replace it with the result computed by the intrinsic. + IC.ReplaceInstUsesWith(*OrigAdd, ZExt); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "sadd.overflow"); +} + +static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, + InstCombiner &IC) { + // Don't bother doing this transformation for pointers, don't do it for + // vectors. + if (!isa<IntegerType>(OrigAddV->getType())) return 0; + + // If the add is a constant expr, then we don't bother transforming it. + Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); + if (OrigAdd == 0) return 0; + + Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(OrigAdd); + + Module *M = I.getParent()->getParent()->getParent(); + Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); + Value *Add = Builder->CreateExtractValue(Call, 0); + + IC.ReplaceInstUsesWith(*OrigAdd, Add); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "uadd.overflow"); +} + +// DemandedBitsLHSMask - When performing a comparison against a constant, +// it is possible that not all the bits in the LHS are demanded. This helper +// method computes the mask that IS demanded. +static APInt DemandedBitsLHSMask(ICmpInst &I, + unsigned BitWidth, bool isSignCheck) { + if (isSignCheck) + return APInt::getSignBit(BitWidth); + + ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); + if (!CI) return APInt::getAllOnesValue(BitWidth); + const APInt &RHS = CI->getValue(); + + switch (I.getPredicate()) { + // For a UGT comparison, we don't care about any bits that + // correspond to the trailing ones of the comparand. The value of these + // bits doesn't impact the outcome of the comparison, because any value + // greater than the RHS must differ in a bit higher than these due to carry. + case ICmpInst::ICMP_UGT: { + unsigned trailingOnes = RHS.countTrailingOnes(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); + return ~lowBitsSet; + } + + // Similarly, for a ULT comparison, we don't care about the trailing zeros. + // Any value less than the RHS must differ in a higher bit because of carries. + case ICmpInst::ICMP_ULT: { + unsigned trailingZeros = RHS.countTrailingZeros(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); + return ~lowBitsSet; + } + + default: + return APInt::getAllOnesValue(BitWidth); + } + +} Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -1595,18 +1783,18 @@ std::swap(Op0, Op1); Changed = true; } - + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); + + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations if (Ty->isIntegerTy(1)) { switch (I.getPredicate()) { default: llvm_unreachable("Invalid icmp instruction!"); case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1); + Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); return BinaryOperator::CreateNot(Xor); } case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B @@ -1616,52 +1804,72 @@ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult // FALL THROUGH case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op1); } case ICmpInst::ICMP_SGT: std::swap(Op0, Op1); // Change icmp sgt -> icmp slt // FALL THROUGH case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op0); } case ICmpInst::ICMP_UGE: std::swap(Op0, Op1); // Change icmp uge -> icmp ule // FALL THROUGH case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op1); } case ICmpInst::ICMP_SGE: std::swap(Op0, Op1); // Change icmp sge -> icmp sle // FALL THROUGH case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op0); } } } unsigned BitWidth = 0; - if (TD) - BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVectorTy()) + if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); + else if (TD) // Pointers require TD info to get their size. + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); bool isSignBit = false; // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { Value *A = 0, *B = 0; - + + // Match the following pattern, which is a common idiom when writing + // overflow-safe integer arithmetic function. The source performs an + // addition in wider type, and explicitly checks for overflow using + // comparisons against INT_MIN and INT_MAX. Simplify this by using the + // sadd_with_overflow intrinsic. + // + // TODO: This could probably be generalized to handle other overflow-safe + // operations if we worked out the formulas to compute the appropriate + // magic constants. + // + // sum = a + b + // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 + { + ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) + if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) + return Res; + } + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) if (I.isEquality() && CI->isZero() && match(Op0, m_Sub(m_Value(A), m_Value(B)))) { // (icmp cond A B) if cond is equality return new ICmpInst(I.getPredicate(), A, B); } - + // If we have an icmp le or icmp ge instruction, turn it into the // appropriate icmp lt or icmp gt instruction. This allows us to rely on // them being folded in the code below. The SimplifyICmpInst code has @@ -1677,15 +1885,15 @@ return new ICmpInst(ICmpInst::ICMP_SLT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()+1)); case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } - + // If this comparison is a normal comparison, it demands all // bits, if it is a sign bit comparison, it only demands the sign bit. bool UnusedBit; @@ -1699,8 +1907,7 @@ APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); if (SimplifyDemandedBits(I.getOperandUse(0), - isSignBit ? APInt::getSignBit(BitWidth) - : APInt::getAllOnesValue(BitWidth), + DemandedBitsLHSMask(I, BitWidth, isSignBit), Op0KnownZero, Op0KnownOne, 0)) return &I; if (SimplifyDemandedBits(I.getOperandUse(1), @@ -1730,28 +1937,94 @@ // that code below can assume that Min != Max. if (!isa<Constant>(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - ConstantInt::get(I.getContext(), Op0Min), Op1); + ConstantInt::get(Op0->getType(), Op0Min), Op1); if (!isa<Constant>(Op1) && Op1Min == Op1Max) return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(I.getContext(), Op1Min)); + ConstantInt::get(Op1->getType(), Op1Min)); // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. + // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { default: llvm_unreachable("Unknown icmp opcode!"); - case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) == 0" into "x != 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) == 0" into "x != 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + break; - case ICmpInst::ICMP_NE: + } + case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) != 0" into "x == 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) != 0" into "x == 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + break; + } case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -1767,9 +2040,9 @@ break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -1786,9 +2059,9 @@ break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -1799,9 +2072,9 @@ break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -1814,30 +2087,30 @@ case ICmpInst::ICMP_SGE: assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_SLE: assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_UGE: assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_ULE: assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; } @@ -1865,9 +2138,9 @@ // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) return Res; @@ -1889,7 +2162,7 @@ // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::Select: { @@ -1911,10 +2184,10 @@ if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { if (!Op1) Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), - RHSC); + RHSC, I.getName()); if (!Op2) Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), - RHSC); + RHSC, I.getName()); return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); } break; @@ -1922,7 +2195,7 @@ case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == + TD->getIntPtrType(RHSC->getContext()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); @@ -1955,8 +2228,8 @@ // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { - if (Op0->getType()->isPointerTy() && - (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { + if (Op0->getType()->isPointerTy() && + (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { // We keep moving the cast from the left operand over to the right // operand, where it can often be eliminated completely. Op0 = CI->getOperand(0); @@ -1978,7 +2251,7 @@ return new ICmpInst(I.getPredicate(), Op0, Op1); } } - + if (isa<CastInst>(Op0)) { // Handle the special case of: icmp (cast bool to X), <cst> // This comes up when you have code like @@ -1990,79 +2263,213 @@ if (Instruction *R = visitICmpInstWithCastAndCast(I)) return R; } - - // See if it's the same type of instruction on the left and right. - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && - Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), - Op1I->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - - if (CI->getValue().isMaxSignedValue()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - } - break; - case Instruction::Mul: - if (!I.isEquality()) - break; - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); - } + // Special logic for binary operators. + BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); + BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); + if (BO0 || BO1) { + CmpInst::Predicate Pred = I.getPredicate(); + bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; + if (BO0 && isa<OverflowingBinaryOperator>(BO0)) + NoOp0WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); + if (BO1 && isa<OverflowingBinaryOperator>(BO1)) + NoOp1WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); + + // Analyze the case when either Op0 or Op1 is an add instruction. + // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Add) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Add) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == Op1 || B == Op1) && NoOp0WrapProblem) + return new ICmpInst(Pred, A == Op1 ? B : A, + Constant::getNullValue(Op1->getType())); + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == Op0 || D == Op0) && NoOp1WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), + C == Op0 ? D : C); + + // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y = (A == C || A == D) ? B : A; + Value *Z = (C == A || C == B) ? D : C; + return new ICmpInst(Pred, Y, Z); + } + + // Analyze the case when either Op0 or Op1 is a sub instruction. + // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). + A = 0; B = 0; C = 0; D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Sub) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Sub) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. + if (A == Op1 && NoOp0WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); + + // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. + if (C == Op0 && NoOp1WrapProblem) + return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); + + // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. + if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, A, C); + + // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. + if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, D, B); + + BinaryOperator *SRem = NULL; + // icmp (srem X, Y), Y + if (BO0 && BO0->getOpcode() == Instruction::SRem && + Op1 == BO0->getOperand(1)) + SRem = BO0; + // icmp Y, (srem X, Y) + else if (BO1 && BO1->getOpcode() == Instruction::SRem && + Op0 == BO1->getOperand(1)) + SRem = BO1; + if (SRem) { + // We don't check hasOneUse to avoid increasing register pressure because + // the value we use is the same value this instruction was already using. + switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { + default: break; + case ICmpInst::ICMP_EQ: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case ICmpInst::ICMP_NE: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), + Constant::getAllOnesValue(SRem->getType())); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), + Constant::getNullValue(SRem->getType())); + } + } + + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && + BO0->hasOneUse() && BO1->hasOneUse() && + BO0->getOperand(1) == BO1->getOperand(1)) { + switch (BO0->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); } - break; + + if (CI->isMaxValue(true)) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); + } } + break; + case Instruction::Mul: + if (!I.isEquality()) + break; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get(I.getContext(), + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + return new ICmpInst(I.getPredicate(), And1, And2); + } + } + break; + case Instruction::UDiv: + case Instruction::LShr: + if (I.isSigned()) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!BO0->isExact() || !BO1->isExact()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + case Instruction::Shl: { + bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); + bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && I.isSigned()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + } } } } - - // ~x < ~y --> y < x + { Value *A, *B; - if (match(Op0, m_Not(m_Value(A))) && - match(Op1, m_Not(m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, A); + // ~x < ~y --> y < x + // ~x < cst --> ~cst < x + if (match(Op0, m_Not(m_Value(A)))) { + if (match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) + return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + } + + // (a+b) <u a --> llvm.uadd.with.overflow. + // (a+b) <u b --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_ULT && + match(Op0, m_Add(m_Value(A), m_Value(B))) && + (Op1 == A || Op1 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) + return R; + + // a >u (a+b) --> llvm.uadd.with.overflow. + // b >u (a+b) --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op1, m_Add(m_Value(A), m_Value(B))) && + (Op0 == A || Op0 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) + return R; } - + if (I.isEquality()) { Value *A, *B, *C, *D; - - // -x == -y --> x == y - if (match(Op0, m_Neg(m_Value(A))) && - match(Op1, m_Neg(m_Value(B)))) - return new ICmpInst(I.getPredicate(), A, B); - + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 Value *OtherVal = A == Op1 ? B : A; @@ -2080,7 +2487,7 @@ Value *Xor = Builder->CreateXor(C, NC); return new ICmpInst(I.getPredicate(), A, Xor); } - + // A^B == A^D -> B == D if (A == C) return new ICmpInst(I.getPredicate(), B, D); if (A == D) return new ICmpInst(I.getPredicate(), B, C); @@ -2088,7 +2495,7 @@ if (B == D) return new ICmpInst(I.getPredicate(), A, C); } } - + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { // A == (A^B) -> B == 0 @@ -2097,22 +2504,11 @@ Constant::getNullValue(A->getType())); } - // (A-B) == A -> B == 0 - if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // A == (A-B) -> B == 0 - if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; - + if (A == C) { X = B; Y = D; Z = A; } else if (A == D) { @@ -2122,7 +2518,7 @@ } else if (B == D) { X = A; Y = C; Z = B; } - + if (X) { // Build (X^Y) & Z Op1 = Builder->CreateXor(X, Y); Op1 = Builder->CreateAnd(Op1, Z); @@ -2131,8 +2527,34 @@ return &I; } } + + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to + // "icmp (and X, mask), cst" + uint64_t ShAmt = 0; + ConstantInt *Cst1; + if (Op0->hasOneUse() && + match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), + m_ConstantInt(ShAmt))))) && + match(Op1, m_ConstantInt(Cst1)) && + // Only do this when A has multiple uses. This is most important to do + // when it exposes other optimizations. + !A->hasOneUse()) { + unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); + + if (ShAmt < ASize) { + APInt MaskV = + APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); + MaskV <<= ShAmt; + + APInt CmpV = Cst1->getValue().zext(ASize); + CmpV <<= ShAmt; + + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); + return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + } + } } - + { Value *X; ConstantInt *Cst; // icmp X+Cst, X @@ -2158,31 +2580,31 @@ Constant *RHSC) { if (!isa<ConstantFP>(RHSC)) return 0; const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); - + // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); if (MantissaWidth == -1) return 0; // Unknown. - + // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - + // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa<UIToFPInst>(LHSI); if (LHSUnsigned) ++InputSize; - + // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) return 0; - + // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is // not a NAN (it would have been previously simplified). assert(!RHS.isNaN() && "NaN comparison not already folded!"); - + ICmpInst::Predicate Pred; switch (I.getPredicate()) { default: llvm_unreachable("Unexpected predicate!"); @@ -2215,15 +2637,15 @@ case FCmpInst::FCMP_UNO: return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } - - const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); - + + IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); + // Now we know that the APFloat is a normal number, zero or inf. - + // See if the FP constant is too large for the integer. For example, // comparing an i8 to 300.0. unsigned IntWidth = IntTy->getScalarSizeInBits(); - + if (!LHSUnsigned) { // If the RHS value is > SignedMax, fold the comparison. This handles +INF // and large values. @@ -2249,7 +2671,7 @@ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } } - + if (!LHSUnsigned) { // See if the RHS value is < SignedMin. APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); @@ -2345,7 +2767,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -2355,7 +2777,7 @@ } Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); @@ -2371,7 +2793,7 @@ I.setPredicate(FCmpInst::FCMP_UNO); I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; - + case FCmpInst::FCMP_ORD: // True if ordered (no nans) case FCmpInst::FCMP_OEQ: // True if ordered and equal case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal @@ -2382,17 +2804,57 @@ return &I; } } - + // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(Op1)) { if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) switch (LHSI->getOpcode()) { + case Instruction::FPExt: { + // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless + FPExtInst *LHSExt = cast<FPExtInst>(LHSI); + ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); + if (!RHSF) + break; + + // We can't convert a PPC double double. + if (RHSF->getType()->isPPC_FP128Ty()) + break; + + const fltSemantics *Sem; + // FIXME: This shouldn't be here. + if (LHSExt->getSrcTy()->isFloatTy()) + Sem = &APFloat::IEEEsingle; + else if (LHSExt->getSrcTy()->isDoubleTy()) + Sem = &APFloat::IEEEdouble; + else if (LHSExt->getSrcTy()->isFP128Ty()) + Sem = &APFloat::IEEEquad; + else if (LHSExt->getSrcTy()->isX86_FP80Ty()) + Sem = &APFloat::x87DoubleExtended; + else + break; + + bool Lossy; + APFloat F = RHSF->getValueAPF(); + F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. Zero is a special case + // that's OK to convert. + APFloat Fabs = F; + Fabs.clearSign(); + if (!Lossy && + ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) || Fabs.isZero())) + + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + ConstantFP::get(RHSC->getContext(), F)); + break; + } case Instruction::PHI: // Only fold fcmp into the PHI if the phi and fcmp are in the same // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::SIToFP: @@ -2411,13 +2873,13 @@ Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. Op2 = Builder->CreateFCmp(I.getPredicate(), - LHSI->getOperand(2), RHSC); + LHSI->getOperand(2), RHSC, I.getName()); } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { // Fold the known value into the constant operand. Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), - RHSC); + RHSC, I.getName()); } } @@ -2425,6 +2887,14 @@ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } + case Instruction::FSub: { + // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C + Value *Op; + if (match(LHSI, m_FNeg(m_Value(Op)))) + return new FCmpInst(I.getSwappedPredicate(), Op, + ConstantExpr::getFNeg(RHSC)); + break; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { @@ -2438,5 +2908,17 @@ } } + // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y); + + // fcmp (fpext x), (fpext y) -> fcmp x, y + if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) + if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) + if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + RHSExt->getOperand(0)); + return Changed ? &I : 0; }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index ab355b1..7446a51 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -26,7 +26,7 @@ // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { - const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); @@ -38,17 +38,17 @@ // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - const Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0); + AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // BasicBlock::iterator It = New; - while (isa<AllocaInst>(*It) || ISA_DEBUG_INFO_INTRINSIC(*It)) ++It; + while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... @@ -57,11 +57,13 @@ Value *Idx[2]; Idx[0] = NullIdx; Idx[1] = NullIdx; - Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, It); + Instruction *GEP = + GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); + InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. - return ReplaceInstUsesWith(AI, V); + return ReplaceInstUsesWith(AI, GEP); } else if (isa<UndefValue>(AI.getArraySize())) { return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } @@ -89,28 +91,28 @@ User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); - const PointerType *DestTy = cast<PointerType>(CI->getType()); - const Type *DestPTy = DestTy->getElementType(); - if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { + PointerType *DestTy = cast<PointerType>(CI->getType()); + Type *DestPTy = DestTy->getElementType(); + if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. - if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) + if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } @@ -128,8 +130,9 @@ // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. LoadInst *NewLoad = - IC.Builder->CreateLoad(CastOp, LI.isVolatile()); + IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } @@ -144,7 +147,7 @@ // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : TD->getABITypeAlignment(LI.getType()); @@ -160,11 +163,12 @@ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - // None of the following transforms are legal for volatile loads. - if (LI.isVolatile()) return 0; + // None of the following transforms are legal for volatile/atomic loads. + // FIXME: Some of it is okay for atomic loads; needs refactoring. + if (!LI.isSimple()) return 0; // Do really simple store-to-load forwarding and load CSE, to catch cases - // where there are several consequtive memory accesses to the same location, + // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) @@ -219,8 +223,10 @@ unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { - LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1)); - LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2)); + LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); V1->setAlignment(Align); V2->setAlignment(Align); return SelectInst::Create(SI->getCondition(), V1, V2); @@ -251,11 +257,11 @@ User *CI = cast<User>(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); - const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); - const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); + Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); + PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; @@ -275,12 +281,12 @@ NewGEPIndices.push_back(Zero); while (1) { - if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { + if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ break; NewGEPIndices.push_back(Zero); SrcPTy = STy->getElementType(0); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { NewGEPIndices.push_back(Zero); SrcPTy = ATy->getElementType(); } else { @@ -309,8 +315,8 @@ Value *NewCast; Value *SIOp0 = SI.getOperand(0); Instruction::CastOps opcode = Instruction::BitCast; - const Type* CastSrcTy = SIOp0->getType(); - const Type* CastDstTy = SrcPTy; + Type* CastSrcTy = SIOp0->getType(); + Type* CastDstTy = SrcPTy; if (CastDstTy->isPointerTy()) { if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; @@ -322,11 +328,13 @@ // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()); + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy); - return new StoreInst(NewCast, CastOp); + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, + SIOp0->getName()+".c"); + SI.setOperand(0, NewCast); + SI.setOperand(1, CastOp); + return &SI; } /// equivalentAddressValues - Test if A and B will obviously have the same @@ -362,29 +370,11 @@ Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); - // If the RHS is an alloca with a single use, zapify the store, making the - // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declares from affecting - // codegen. - if (!SI.isVolatile()) { - if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) - return EraseInstFromFunction(SI); - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { - if (isa<AllocaInst>(GEP->getOperand(0))) { - if (GEP->getOperand(0)->hasOneUse()) - return EraseInstFromFunction(SI); - } - } - } - } - // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), + TD); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : TD->getABITypeAlignment(Val->getType()); @@ -395,6 +385,23 @@ SI.setAlignment(EffectiveStoreAlign); } + // Don't hack volatile/atomic stores. + // FIXME: Some bits are legal for atomic stores; needs refactoring. + if (!SI.isSimple()) return 0; + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + if (Ptr->hasOneUse()) { + if (isa<AllocaInst>(Ptr)) + return EraseInstFromFunction(SI); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { + if (isa<AllocaInst>(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) + return EraseInstFromFunction(SI); + } + } + } + // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. @@ -404,7 +411,7 @@ --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. - if (ISA_DEBUG_INFO_INTRINSIC(BBI) || + if (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; @@ -412,8 +419,8 @@ if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { + if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); @@ -427,7 +434,7 @@ // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - !SI.isVolatile()) + LI->isSimple()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it @@ -439,9 +446,6 @@ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } - - - if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { @@ -474,7 +478,7 @@ BBI = &SI; do { ++BBI; - } while (ISA_DEBUG_INFO_INTRINSIC(BBI) || + } while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) @@ -538,17 +542,17 @@ if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. - while (ISA_DEBUG_INFO_INTRINSIC(BBI) || + while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; } - // If this isn't a store, isn't a store to the same location, or if the - // alignments differ, bail out. + // If this isn't a store, isn't a store to the same location, or is not the + // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the @@ -564,7 +568,7 @@ // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; break; } @@ -588,8 +592,7 @@ // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType()); - PN->reserveOperandSpace(2); + PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); @@ -597,11 +600,15 @@ // Advance to a place where it is safe to insert the new store and // insert it. - BBI = DestBB->getFirstNonPHI(); - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()), *BBI); - + BBI = DestBB->getFirstInsertionPt(); + StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), + SI.isVolatile(), + SI.getAlignment(), + SI.getOrdering(), + SI.getSynchScope()); + InsertNewInstBefore(NewSI, *BBI); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); + // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore);
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index c6111f0..7f48125 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -14,26 +14,76 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; -/// SubOne - Subtract one from a ConstantInt. -static Constant *SubOne(ConstantInt *C) { - return ConstantInt::get(C->getContext(), C->getValue()-1); + +/// simplifyValueKnownNonZero - The specific integer value is used in a context +/// where it is known to be non-zero. If this allows us to simplify the +/// computation, do so and return the new operand, otherwise return null. +static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { + // If V has multiple uses, then we would have to do more analysis to determine + // if this is safe. For example, the use could be in dynamically unreached + // code. + if (!V->hasOneUse()) return 0; + + bool MadeChange = false; + + // ((1 << A) >>u B) --> (1 << (A-B)) + // Because V cannot be zero, we know that B is less than A. + Value *A = 0, *B = 0, *PowerOf2 = 0; + if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), + m_Value(B))) && + // The "1" can be any value known to be a power of 2. + isPowerOfTwo(PowerOf2, IC.getTargetData())) { + A = IC.Builder->CreateSub(A, B); + return IC.Builder->CreateShl(PowerOf2, A); + } + + // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it + // inexact. Similarly for <<. + if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) + if (I->isLogicalShift() && + isPowerOfTwo(I->getOperand(0), IC.getTargetData())) { + // We know that this is an exact/nuw shift and that the input is a + // non-zero context as well. + if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { + I->setOperand(0, V2); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::LShr && !I->isExact()) { + I->setIsExact(); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) { + I->setHasNoUnsignedWrap(); + MadeChange = true; + } + } + + // TODO: Lots more we could do here: + // If V is a phi node, we can call this on each of its operands. + // "select cond, X, 0" can simplify to "X". + + return MadeChange ? V : 0; } + /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { uint32_t W = C1->getBitWidth(); APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); if (sign) { - LHSExt.sext(W * 2); - RHSExt.sext(W * 2); + LHSExt = LHSExt.sext(W * 2); + RHSExt = RHSExt.sext(W * 2); } else { - LHSExt.zext(W * 2); - RHSExt.zext(W * 2); + LHSExt = LHSExt.zext(W * 2); + RHSExt = RHSExt.zext(W * 2); } APInt MulExt = LHSExt * RHSExt; @@ -47,62 +97,71 @@ } Instruction *InstCombiner::visitMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) // undef * X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + if (Value *V = SimplifyMulInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); - // ((X << C1)*C2) == (X * (C2 << C1)) - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) - if (SI->getOpcode() == Instruction::Shl) - if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) - return BinaryOperator::CreateMul(SI->getOperand(0), - ConstantExpr::getShl(CI, ShOp)); - - if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 - if (CI->equalsInt(1)) // X * 1 == X - return ReplaceInstUsesWith(I, Op0); - if (CI->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0); - - const APInt& Val = cast<ConstantInt>(CI)->getValue(); - if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C - return BinaryOperator::CreateShl(Op0, - ConstantInt::get(Op0->getType(), Val.logBase2())); + if (match(Op1, m_AllOnes())) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + + // ((X << C1)*C2) == (X * (C2 << C1)) + if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) + if (SI->getOpcode() == Instruction::Shl) + if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) + return BinaryOperator::CreateMul(SI->getOperand(0), + ConstantExpr::getShl(CI, ShOp)); + + const APInt &Val = CI->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C + Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2()); + BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst); + if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap(); + if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); + return Shl; + } + + // Canonicalize (X+C1)*CI -> X*CI+C1*CI. + { Value *X; ConstantInt *C1; + if (Op0->hasOneUse() && + match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) { + Value *Add = Builder->CreateMul(X, CI); + return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); } - } else if (Op1C->getType()->isVectorTy()) { - if (Op1C->isNullValue()) - return ReplaceInstUsesWith(I, Op1C); + } - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { - if (Op1V->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0); - - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) - if (CI->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); + // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n + // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n + // The "* (2**n)" thus becomes a potential shifting opportunity. + { + const APInt & Val = CI->getValue(); + const APInt &PosVal = Val.abs(); + if (Val.isNegative() && PosVal.isPowerOf2()) { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse()) { + ConstantInt *C1; + Value *Sub = 0; + if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) + Sub = Builder->CreateSub(X, Y, "suba"); + else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) + Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); + if (Sub) + return + BinaryOperator::CreateMul(Sub, + ConstantInt::get(Y->getType(), PosVal)); } } } - - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) - if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { - // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C); - Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); - return BinaryOperator::CreateAdd(Add, C1C2); - - } - + } + + // Simplify mul instructions with a constant RHS. + if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -135,8 +194,8 @@ BO->getOpcode() == Instruction::SDiv)) { Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - // If the division is exact, X % Y is zero. - if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) + // If the division is exact, X % Y is zero, so we end up with X or -X. + if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO)) if (SDiv->isExact()) { if (Op1BO == Op1C) return ReplaceInstUsesWith(I, Op0BO); @@ -194,7 +253,7 @@ } Instruction *InstCombiner::visitFMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Simplify mul instructions with a constant RHS... @@ -304,28 +363,6 @@ } -/// This function implements the transforms on div instructions that work -/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is -/// used by the visitors to those instructions. -/// @brief Transforms common to all three div instructions -Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // undef / X -> 0 for integer. - // undef / X -> undef for FP (the undef could be a snan). - if (isa<UndefValue>(Op0)) { - if (Op0->getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // X / undef -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); - - return 0; -} - /// This function implements the transforms common to both integer division /// instructions (udiv and sdiv). It is called by the visitors to those integer /// division instructions. @@ -333,31 +370,18 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // (sdiv X, X) --> 1 (udiv X, X) --> 1 - if (Op0 == Op1) { - if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { - Constant *CI = ConstantInt::get(Ty->getElementType(), 1); - std::vector<Constant*> Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); - } - - Constant *CI = ConstantInt::get(I.getType(), 1); - return ReplaceInstUsesWith(I, CI); + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; } - if (Instruction *Common = commonDivTransforms(I)) - return Common; - // Handle cases involving: [su]div X, (select Cond, Y, Z) // This does not apply for fdiv. if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) return &I; if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // div X, 1 == X - if (RHS->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - // (X / C1) / C2 -> X / (C1*C2) if (Instruction *LHS = dyn_cast<Instruction>(Op0)) if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) @@ -365,9 +389,8 @@ if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv)) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - else - return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - ConstantExpr::getMul(RHS, LHSRHS)); + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + ConstantExpr::getMul(RHS, LHSRHS)); } if (!RHS->isZero()) { // avoid X udiv 0 @@ -380,28 +403,41 @@ } } - // 0 / X == 0, we don't need to preserve faults! - if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) - if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // See if we can fold away this div instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; - // It can't be division by zero, hence it must be division by one. - if (I.getType()->isIntegerTy(1)) - return ReplaceInstUsesWith(I, Op0); - - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { - if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue())) - // div X, 1 == X - if (X->isOne()) - return ReplaceInstUsesWith(I, Op0); + // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y + Value *X = 0, *Z = 0; + if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 + bool isSigned = I.getOpcode() == Instruction::SDiv; + if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || + (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1))))) + return BinaryOperator::Create(I.getOpcode(), X, Op1); } return 0; } +/// dyn_castZExtVal - Checks if V is a zext or constant that can +/// be truncated to Ty without losing bits. +static Value *dyn_castZExtVal(Value *V, Type *Ty) { + if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) { + if (Z->getSrcTy() == Ty) + return Z->getOperand(0); + } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) { + if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth()) + return ConstantExpr::getTrunc(C, Ty); + } + return 0; +} + Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyUDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; @@ -410,60 +446,66 @@ // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 - return BinaryOperator::CreateLShr(Op0, + if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2 + BinaryOperator *LShr = + BinaryOperator::CreateLShr(Op0, ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + if (I.isExact()) LShr->setIsExact(); + return LShr; + } // X udiv C, where C >= signbit if (C->getValue().isNegative()) { - Value *IC = Builder->CreateICmpULT( Op0, C); + Value *IC = Builder->CreateICmpULT(Op0, C); return SelectInst::Create(IC, Constant::getNullValue(I.getType()), ConstantInt::get(I.getType(), 1)); } } // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) - if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) { - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue(); - if (C1.isPowerOf2()) { - Value *N = RHSI->getOperand(1); - const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) - N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2)); - return BinaryOperator::CreateLShr(Op0, N); - } + { const APInt *CI; Value *N; + if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { + if (*CI != 1) + N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); + if (I.isExact()) + return BinaryOperator::CreateExactLShr(Op0, N); + return BinaryOperator::CreateLShr(Op0, N); } } // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); - if (TVA.isPowerOf2() && FVA.isPowerOf2()) { - // Compute the shift amounts - uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); - // Construct the "on true" case of the select - Constant *TC = ConstantInt::get(Op0->getType(), TSA); - Value *TSI = Builder->CreateLShr(Op0, TC); + { Value *Cond; const APInt *C1, *C2; + if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { + // Construct the "on true" case of the select + Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t", + I.isExact()); - // Construct the "on false" case of the select - Constant *FC = ConstantInt::get(Op0->getType(), FSA); - Value *FSI = Builder->CreateLShr(Op0, FC); + // Construct the "on false" case of the select + Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f", + I.isExact()); + + // construct the select instruction and return it. + return SelectInst::Create(Cond, TSI, FSI); + } + } - // construct the select instruction and return it. - return SelectInst::Create(SI->getOperand(0), TSI, FSI); - } - } + // (zext A) udiv (zext B) --> zext (A udiv B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", + I.isExact()), + I.getType()); + return 0; } Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; @@ -473,20 +515,17 @@ if (RHS->isAllOnesValue()) return BinaryOperator::CreateNeg(Op0); - // sdiv X, C --> ashr X, log2(C) - if (cast<SDivOperator>(&I)->isExact() && - RHS->getValue().isNonNegative() && + // sdiv X, C --> ashr exact X, log2(C) + if (I.isExact() && RHS->getValue().isNonNegative() && RHS->getValue().isPowerOf2()) { Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), RHS->getValue().exactLogBase2()); - return BinaryOperator::CreateAShr(Op0, ShAmt); + return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName()); } // -X/C --> X/-C provided the negation doesn't overflow. if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) - if (isa<Constant>(Sub->getOperand(0)) && - cast<Constant>(Sub->getOperand(0))->isNullValue() && - Sub->hasNoSignedWrap()) + if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap()) return BinaryOperator::CreateSDiv(Sub->getOperand(1), ConstantExpr::getNeg(RHS)); } @@ -498,16 +537,15 @@ if (MaskedValueIsZero(Op0, Mask)) { if (MaskedValueIsZero(Op1, Mask)) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateUDiv(Op0, Op1); + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } - ConstantInt *ShiftedInt; - if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && - ShiftedInt->getValue().isPowerOf2()) { + + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have // the sign bit set. - return BinaryOperator::CreateUDiv(Op0, Op1); + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } } } @@ -516,27 +554,22 @@ } Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { - return commonDivTransforms(I); -} - -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef + if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) - return &I; + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + const APFloat &Op1F = Op1C->getValueAPF(); + + // If the divisor has an exact multiplicative inverse we can turn the fdiv + // into a cheaper fmul. + APFloat Reciprocal(Op1F.getSemantics()); + if (Op1F.getExactInverse(&Reciprocal)) { + ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Op0, RFP); + } + } return 0; } @@ -548,22 +581,17 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Instruction *common = commonRemTransforms(I)) - return common; + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; + } - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast<Constant>(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - + if (isa<ConstantInt>(Op1)) { if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -585,51 +613,52 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyURemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *common = commonIRemTransforms(I)) return common; - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X urem C^2 -> X and C - // Check to see if this is an unsigned remainder with an exact power of 2, - // if so, convert to a bitwise and. - if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) - if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C)); + // X urem C^2 -> X and C-1 + { const APInt *C; + if (match(Op1, m_Power2(C))) + return BinaryOperator::CreateAnd(Op0, + ConstantInt::get(I.getType(), *C-1)); } - if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(RHSI, N1); - return BinaryOperator::CreateAnd(Op0, Add); - } + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(Op1, N1); + return BinaryOperator::CreateAnd(Op0, Add); + } + + // urem X, (select Cond, 2^C1, 2^C2) --> + // select Cond, (and X, C1-1), (and X, C2-1) + // when C1&C2 are powers of two. + { Value *Cond; const APInt *C1, *C2; + if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { + Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f"); + return SelectInst::Create(Cond, TrueAnd, FalseAnd); } } - // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) { - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - // STO == 0 and SFO == 0 handled above. - if ((STO->getValue().isPowerOf2()) && - (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO)); - Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO)); - return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); - } - } - } - + // (zext A) urem (zext B) --> zext (A urem B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + I.getType()); + return 0; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer rem common cases if (Instruction *Common = commonIRemTransforms(I)) return Common; @@ -650,7 +679,7 @@ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1); + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); } } @@ -661,14 +690,14 @@ bool hasNegative = false; for (unsigned i = 0; !hasNegative && i != VWidth; ++i) if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) hasNegative = true; if (hasNegative) { std::vector<Constant *> Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); else Elts[i] = RHS; @@ -688,6 +717,14 @@ } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); -} + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + return 0; +}
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp index 2f33bf3..664546c 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" @@ -27,25 +28,40 @@ Value *LHSVal = FirstInst->getOperand(0); Value *RHSVal = FirstInst->getOperand(1); - const Type *LHSType = LHSVal->getType(); - const Type *RHSType = RHSVal->getType(); + Type *LHSType = LHSVal->getType(); + Type *RHSType = RHSVal->getType(); + + bool isNUW = false, isNSW = false, isExact = false; + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); if (!I || I->getOpcode() != Opc || !I->hasOneUse() || // Verify type of the LHS matches so we don't fold cmp's of different - // types or GEP's with different index types. + // types. I->getOperand(0)->getType() != LHSType || I->getOperand(1)->getType() != RHSType) return 0; // If they are CmpInst instructions, check their predicates - if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) - if (cast<CmpInst>(I)->getPredicate() != - cast<CmpInst>(FirstInst)->getPredicate()) + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) return 0; + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); + // Keep track of which operand needs a phi node. if (I->getOperand(0) != LHSVal) LHSVal = 0; if (I->getOperand(1) != RHSVal) RHSVal = 0; @@ -64,16 +80,16 @@ Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = 0, *NewRHS = 0; if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); + NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), + FirstInst->getOperand(0)->getName() + ".pn"); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewLHS, PN); LHSVal = NewLHS; } if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); + NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), + FirstInst->getOperand(1)->getName() + ".pn"); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewRHS, PN); RHSVal = NewRHS; @@ -94,11 +110,21 @@ } } - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); - CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - LHSVal, RHSVal); + if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) { + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } + + BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst); + BinaryOperator *NewBinOp = + BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); + if (isNUW) NewBinOp->setHasNoUnsignedWrap(); + if (isNSW) NewBinOp->setHasNoSignedWrap(); + if (isExact) NewBinOp->setIsExact(); + NewBinOp->setDebugLoc(FirstInst->getDebugLoc()); + return NewBinOp; } Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { @@ -115,6 +141,8 @@ // especially bad when the PHIs are in the header of a loop. bool NeededPhi = false; + bool AllInBounds = true; + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); @@ -122,6 +150,8 @@ GEP->getNumOperands() != FirstInst->getNumOperands()) return 0; + AllInBounds &= GEP->isInBounds(); + // Keep track of whether or not all GEPs are of alloca pointers. if (AllBasePointersAreAllocas && (!isa<AllocaInst>(GEP->getOperand(0)) || @@ -174,10 +204,10 @@ for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { if (FixedOperands[i]) continue; // operand doesn't need a phi. Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType()); + PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, + FirstOp->getName()+".pn"); InsertNewInstBefore(NewPN, PN); - NewPN->reserveOperandSpace(e); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[i] = NewPN; FixedOperands[i] = NewPN; @@ -198,11 +228,11 @@ } Value *Base = FixedOperands[0]; - return cast<GEPOperator>(FirstInst)->isInBounds() ? - GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, - FixedOperands.end()) : - GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); + GetElementPtrInst *NewGEP = + GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); + if (AllInBounds) NewGEP->setIsInBounds(); + NewGEP->setDebugLoc(FirstInst->getDebugLoc()); + return NewGEP; } @@ -211,7 +241,7 @@ /// obvious the value of the load is not changed from the point of the load to /// the end of the block it is in. /// -/// Finally, it is safe, but not profitable, to sink a load targetting a +/// Finally, it is safe, but not profitable, to sink a load targeting a /// non-address-taken alloca. Doing so will cause us to not promote the alloca /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { @@ -256,7 +286,12 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); - + + // FIXME: This is overconservative; this transform is allowed in some cases + // for atomic operations. + if (FirstLI->isAtomic()) + return 0; + // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. We currently // don't sink loads when some have their alignment specified and some don't. @@ -310,8 +345,9 @@ // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType()); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); + PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getNumIncomingValues(), + PN.getName()+".in"); Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -342,7 +378,9 @@ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); - return new LoadInst(PhiVal, isVolatile, LoadAlignment); + LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment); + NewLI->setDebugLoc(FirstLI->getDebugLoc()); + return NewLI; } @@ -363,7 +401,8 @@ // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. Constant *ConstantOp = 0; - const Type *CastSrcTy = 0; + Type *CastSrcTy = 0; + bool isNUW = false, isNSW = false, isExact = false; if (isa<CastInst>(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); @@ -380,6 +419,14 @@ ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); if (ConstantOp == 0) return FoldPHIArgBinOpIntoPHI(PN); + + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); } else { return 0; // Cannot fold this operation. } @@ -395,12 +442,20 @@ } else if (I->getOperand(1) != ConstantOp) { return 0; } + + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); } // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType()); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); + PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getNumIncomingValues(), + PN.getName()+".in"); Value *InVal = FirstInst->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -425,15 +480,27 @@ } // Insert and return the new operation. - if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) - return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); + if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) { + CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal, + PN.getType()); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) { + BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (isNUW) BinOp->setHasNoUnsignedWrap(); + if (isNSW) BinOp->setHasNoSignedWrap(); + if (isExact) BinOp->setIsExact(); + BinOp->setDebugLoc(FirstInst->getDebugLoc()); + return BinOp; + } CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - PhiVal, ConstantOp); + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + PhiVal, ConstantOp); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; } /// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle @@ -509,7 +576,7 @@ unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} // Ctor form used by DenseMap. @@ -568,6 +635,23 @@ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { PHINode *PN = PHIsToSlice[PHIId]; + // Scan the input list of the PHI. If any input is an invoke, and if the + // input is defined in the predecessor, then we won't be split the critical + // edge which is required to insert a truncate. Because of this, we have to + // bail out. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); + if (II == 0) continue; + if (II->getParent() != PN->getIncomingBlock(i)) + continue; + + // If we have a phi, and if it's directly in the predecessor, then we have + // a critical edge where we need to put the truncate. Since we can't + // split the edge in instcombine, we have to bail out. + return 0; + } + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); @@ -621,7 +705,7 @@ unsigned PHIId = PHIUsers[UserI].PHIId; PHINode *PN = PHIsToSlice[PHIId]; unsigned Offset = PHIUsers[UserI].Shift; - const Type *Ty = PHIUsers[UserI].Inst->getType(); + Type *Ty = PHIUsers[UserI].Inst->getType(); PHINode *EltPHI; @@ -630,7 +714,8 @@ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN); + EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), + PN->getName()+".off"+Twine(Offset), PN); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); @@ -667,8 +752,8 @@ Value *Res = InVal; if (Offset) Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), - Offset)); - Res = Builder->CreateTrunc(Res, Ty); + Offset), "extract"); + Res = Builder->CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); @@ -707,10 +792,7 @@ // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - - if (Value *V = PN.hasConstantValue()) + if (Value *V = SimplifyInstruction(&PN, TD)) return ReplaceInstUsesWith(PN, V); // If all PHI operands are the same operation, pull them through the PHI, @@ -757,18 +839,18 @@ // quick check to see if the PHI node only contains a single non-phi value, if // so, scan to see if the phi cycle is actually equal to that value. { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues(); // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && + while (InValNo != NumIncomingVals && isa<PHINode>(PN.getIncomingValue(InValNo))) ++InValNo; - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); + if (InValNo != NumIncomingVals) { + Value *NonPhiInVal = PN.getIncomingValue(InValNo); // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { Value *OpVal = PN.getIncomingValue(InValNo); if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) break; @@ -777,7 +859,7 @@ // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. - if (InValNo == NumOperandVals) { + if (InValNo == NumIncomingVals) { SmallPtrSet<PHINode*, 16> ValueEqualPHIs; if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) return ReplaceInstUsesWith(PN, NonPhiInVal);
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp index d8f3f4e..91e60a4 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" using namespace llvm; using namespace PatternMatch; @@ -24,14 +25,14 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); if (SI == 0) return SPF_UNKNOWN; - + ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); if (ICI == 0) return SPF_UNKNOWN; - + LHS = ICI->getOperand(0); RHS = ICI->getOperand(1); - - // (icmp X, Y) ? X : Y + + // (icmp X, Y) ? X : Y if (SI->getTrueValue() == ICI->getOperand(0) && SI->getFalseValue() == ICI->getOperand(1)) { switch (ICI->getPredicate()) { @@ -46,8 +47,8 @@ case ICmpInst::ICMP_SLE: return SPF_SMIN; } } - - // (icmp X, Y) ? Y : X + + // (icmp X, Y) ? Y : X if (SI->getTrueValue() == ICI->getOperand(1) && SI->getFalseValue() == ICI->getOperand(0)) { switch (ICI->getPredicate()) { @@ -62,9 +63,9 @@ case ICmpInst::ICMP_SLE: return SPF_SMAX; } } - + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - + return SPF_UNKNOWN; } @@ -133,10 +134,9 @@ } // Fold this by inserting a select from the input values. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0)); - InsertNewInstBefore(NewSI, SI); - return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, + Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName()+".v"); + return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } @@ -174,9 +174,8 @@ } // If we reach here, they do have operations in common. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, - OtherOpF); - InsertNewInstBefore(NewSI, SI); + Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, + OtherOpF, SI.getName()+".v"); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { if (MatchIsOpZero) @@ -195,7 +194,10 @@ ConstantInt *C2I = dyn_cast<ConstantInt>(C2); if (!C2I) return false; - return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); + if (!C1I->isZero() && !C2I->isZero()) // One side must be zero. + return false; + return C1I->isOne() || C1I->isAllOnesValue() || + C2I->isOne() || C2I->isAllOnesValue(); } /// FoldSelectIntoOp - Try fold the select into one of the operands to @@ -211,7 +213,7 @@ unsigned OpToFold = 0; if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { OpToFold = 2; } @@ -219,14 +221,20 @@ Constant *C = GetSelectFoldableConstant(TVI); Value *OOp = TVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. + // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); - InsertNewInstBefore(NewSel, SI); + Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); + BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), + FalseVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(TVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -240,7 +248,7 @@ unsigned OpToFold = 0; if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { OpToFold = 2; } @@ -248,14 +256,20 @@ Constant *C = GetSelectFoldableConstant(FVI); Value *OOp = FVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. + // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); - InsertNewInstBefore(NewSel, SI); + Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); + BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), + TrueVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(FVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -265,6 +279,64 @@ return 0; } +/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is +/// replaced with RepOp. +static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const TargetData *TD) { + // Trivial replacement. + if (V == Op) + return RepOp; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return 0; + + // If this is a binary operator, try to simplify it with the replaced op. + if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { + if (B->getOperand(0) == Op) + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD); + if (B->getOperand(1) == Op) + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD); + } + + // Same for CmpInsts. + if (CmpInst *C = dyn_cast<CmpInst>(I)) { + if (C->getOperand(0) == Op) + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD); + if (C->getOperand(1) == Op) + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD); + } + + // TODO: We could hand off more cases to instsimplify here. + + // If all operands are constant after substituting Op for RepOp then we can + // constant fold the instruction. + if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { + // Build a list of all constant operands. + SmallVector<Constant*, 8> ConstOps; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (I->getOperand(i) == Op) + ConstOps.push_back(CRepOp); + else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) + ConstOps.push_back(COp); + else + break; + } + + // All operands were constants, fold it. + if (ConstOps.size() == I->getNumOperands()) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + ConstOps, TD); + } + } + + return 0; +} + /// visitSelectInstWithICmp - Visit a SelectInst that has an /// ICmpInst as its first operand. /// @@ -278,52 +350,95 @@ Value *FalseVal = SI.getFalseValue(); // Check cases where the comparison is with a constant that - // can be adjusted to fit the min/max idiom. We may edit ICI in - // place here, so make sure the select is the only user. + // can be adjusted to fit the min/max idiom. We may move or edit ICI + // here, so make sure the select is the only user. if (ICI->hasOneUse()) if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { + // X < MIN ? T : F --> F + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT) + && CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X > MAX ? T : F --> F + else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT) + && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) + return ReplaceInstUsesWith(SI, FalseVal); switch (Pred) { default: break; case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: { - // X < MIN ? T : F --> F - if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = - ConstantInt::get(CI->getContext(), CI->getValue()-1); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } + case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: { - // X > MAX ? T : F --> F - if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) - return ReplaceInstUsesWith(SI, FalseVal); + // These transformations only work for selects over integers. + IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); + if (!SelectTy) + break; + + Constant *AdjustedRHS; + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1); + else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1); + // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = - ConstantInt::get(CI->getContext(), CI->getValue()+1); + // X < C ? X : C-1 --> X > C-1 ? C-1 : X if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) + ; // Nothing to do here. Values match without any sign/zero extension. + + // Types do not match. Instead of calculating this with mixed types + // promote all to the larger type. This enables scalar evolution to + // analyze this expression. + else if (CmpRHS->getType()->getScalarSizeInBits() + < SelectTy->getBitWidth()) { + Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy); + + // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X + // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X + // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X + // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X + if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = sextRHS; + } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = sextRHS; + } else if (ICI->isUnsigned()) { + Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy); + // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X + // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X + // zext + signed compare cannot be changed: + // 0xff <s 0x00, but 0x00ff >s 0x0000 + if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = zextRHS; + } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = zextRHS; + } else + break; + } else + break; + } else + break; + + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(0, CmpLHS); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + + // Move ICI instruction right before the select instruction. Otherwise + // the sext/zext value may be defined after the ICI instruction uses it. + ICI->moveBefore(&SI); + + Changed = true; break; } } @@ -334,7 +449,7 @@ // FIXME: Type and constness constraints could be lifted, but we have to // watch code size carefully. We should consider xor instead of // sub/add when we decide to do that. - if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { + if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { ConstantInt *C1 = NULL, *C2 = NULL; @@ -360,24 +475,39 @@ } } - if (CmpLHS == TrueVal && CmpRHS == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (Pred == ICmpInst::ICMP_EQ) + // If we have an equality comparison then we know the value in one of the + // arms of the select. See if substituting this value into the arm and + // simplifying the result yields the same value as the other arm. + if (Pred == ICmpInst::ICMP_EQ) { + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? X : Y -> X - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - - } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { - // Transform (X == Y) ? Y : X -> X - if (Pred == ICmpInst::ICMP_EQ) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? Y : X -> Y - if (Pred == ICmpInst::ICMP_NE) + } else if (Pred == ICmpInst::ICMP_NE) { + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + return ReplaceInstUsesWith(SI, TrueVal); } + + // NOTE: if we wanted to, this is where to detect integer MIN/MAX + + if (isa<Constant>(CmpRHS)) { + if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { + // Transform (X == C) ? X : Y -> (X == C) ? C : Y + SI.setOperand(1, CmpRHS); + Changed = true; + } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { + // Transform (X != C) ? Y : X -> (X != C) ? Y : C + SI.setOperand(2, CmpRHS); + Changed = true; + } + } + return Changed ? &SI : 0; } @@ -399,28 +529,28 @@ // can always be mapped. const Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return true; - + // If V is a PHI node defined in the same block as the condition PHI, we can // map the arguments. const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); - + if (const PHINode *VP = dyn_cast<PHINode>(I)) if (VP->getParent() == CondPHI->getParent()) return true; - + // Otherwise, if the PHI and select are defined in the same block and if V is // defined in a different block, then we can transform it. if (SI.getParent() == CondPHI->getParent() && I->getParent() != CondPHI->getParent()) return true; - + // Otherwise we have a 'hard' case and we can't tell without doing more // detailed dominator based analysis, punt. return false; } /// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: -/// SPF2(SPF1(A, B), C) +/// SPF2(SPF1(A, B), C) Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, @@ -431,7 +561,7 @@ // MIN(MIN(a, b), a) -> MIN(a, b) if (SPF1 == SPF2) return ReplaceInstUsesWith(Outer, Inner); - + // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || @@ -440,13 +570,81 @@ (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) return ReplaceInstUsesWith(Outer, C); } - + // TODO: MIN(MIN(A, 23), 97) return 0; } +/// foldSelectICmpAnd - If one of the constants is zero (we know they can't +/// both be) and we have an icmp instruction with zero, and we have an 'and' +/// with the non-constant value and a power of two we can turn the select +/// into a shift on the result of the 'and'. +static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, + ConstantInt *FalseVal, + InstCombiner::BuilderTy *Builder) { + const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); + if (!IC || !IC->isEquality()) + return 0; + if (!match(IC->getOperand(1), m_Zero())) + return 0; + + ConstantInt *AndRHS; + Value *LHS = IC->getOperand(0); + if (LHS->getType() != SI.getType() || + !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) + return 0; + + // If both select arms are non-zero see if we have a select of the form + // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic + // for 'x ? 2^n : 0' and fix the thing up at the end. + ConstantInt *Offset = 0; + if (!TrueVal->isZero() && !FalseVal->isZero()) { + if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) + Offset = FalseVal; + else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) + Offset = TrueVal; + else + return 0; + + // Adjust TrueVal and FalseVal to the offset. + TrueVal = ConstantInt::get(Builder->getContext(), + TrueVal->getValue() - Offset->getValue()); + FalseVal = ConstantInt::get(Builder->getContext(), + FalseVal->getValue() - Offset->getValue()); + } + + // Make sure the mask in the 'and' and one of the select arms is a power of 2. + if (!AndRHS->getValue().isPowerOf2() || + (!TrueVal->getValue().isPowerOf2() && + !FalseVal->getValue().isPowerOf2())) + return 0; + + // Determine which shift is needed to transform result of the 'and' into the + // desired result. + ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; + unsigned ValZeros = ValC->getValue().logBase2(); + unsigned AndZeros = AndRHS->getValue().logBase2(); + + Value *V = LHS; + if (ValZeros > AndZeros) + V = Builder->CreateShl(V, ValZeros - AndZeros); + else if (ValZeros < AndZeros) + V = Builder->CreateLShr(V, AndZeros - ValZeros); + + // Okay, now we know that everything is set up, we just don't know whether we + // have a icmp_ne or icmp_eq and whether the true or false val is the zero. + bool ShouldNotVal = !TrueVal->isZero(); + ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; + if (ShouldNotVal) + V = Builder->CreateXor(V, ValC); + + // Apply an offset if needed. + if (Offset) + V = Builder->CreateAdd(V, Offset); + return V; +} Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); @@ -463,8 +661,7 @@ return BinaryOperator::CreateOr(CondVal, FalseVal); } // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal), SI); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { if (C->getZExtValue() == false) { @@ -472,11 +669,10 @@ return BinaryOperator::CreateAnd(CondVal, TrueVal); } // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal), SI); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateOr(NotCond, TrueVal); } - + // select a, b, a -> a&b // select a, a, b -> a|b if (CondVal == TrueVal) @@ -495,44 +691,21 @@ // select C, -1, 0 -> sext C to int if (FalseValC->isZero() && TrueValC->isAllOnesValue()) return new SExtInst(CondVal, SI.getType()); - + // select C, 0, 1 -> zext !C to int if (TrueValC->isZero() && FalseValC->getValue() == 1) { - Value *NotCond = Builder->CreateNot(CondVal); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return new ZExtInst(NotCond, SI.getType()); } // select C, 0, -1 -> sext !C to int if (TrueValC->isZero() && FalseValC->isAllOnesValue()) { - Value *NotCond = Builder->CreateNot(CondVal); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return new SExtInst(NotCond, SI.getType()); } - - if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { - // If one of the constants is zero (we know they can't both be) and we - // have an icmp instruction with zero, and we have an 'and' with the - // non-constant value, eliminate this whole mess. This corresponds to - // cases like this: ((X & 27) ? 27 : 0) - if (TrueValC->isZero() || FalseValC->isZero()) - if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) && - cast<Constant>(IC->getOperand(1))->isNullValue()) - if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0))) - if (ICA->getOpcode() == Instruction::And && - isa<ConstantInt>(ICA->getOperand(1)) && - (ICA->getOperand(1) == TrueValC || - ICA->getOperand(1) == FalseValC) && - cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) { - // Okay, now we know that everything is set up, we just don't - // know whether we have a icmp_ne or icmp_eq and whether the - // true or false val is the zero. - bool ShouldNotVal = !TrueValC->isZero(); - ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; - Value *V = ICA; - if (ShouldNotVal) - V = Builder->CreateXor(V, ICA->getOperand(1)); - return ReplaceInstUsesWith(SI, V); - } - } + + if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder)) + return ReplaceInstUsesWith(SI, V); } // See if we are selecting two values based on a comparison of the two values. @@ -540,7 +713,7 @@ if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { // Transform (X == Y) ? X : Y -> Y if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -552,7 +725,7 @@ } // Transform (X une Y) ? X : Y -> X if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -567,7 +740,7 @@ } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ // Transform (X == Y) ? Y : X -> X if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -579,7 +752,7 @@ } // Transform (X une Y) ? Y : X -> Y if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -635,23 +808,24 @@ // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z - if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { - NegVal = ConstantExpr::getNeg(C); + if (SI.getType()->isFPOrFPVectorTy()) { + NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); } else { - NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1)), SI); + NegVal = Builder->CreateNeg(SubOp->getOperand(1)); } Value *NewTrueOp = OtherAddOp; Value *NewFalseOp = NegVal; if (AddOp != TI) std::swap(NewTrueOp, NewFalseOp); - Instruction *NewSel = - SelectInst::Create(CondVal, NewTrueOp, - NewFalseOp); + Value *NewSel = + Builder->CreateSelect(CondVal, NewTrueOp, + NewFalseOp, SI.getName() + ".p"); - NewSel = InsertNewInstBefore(NewSel, SI); - return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); + if (SI.getType()->isFPOrFPVectorTy()) + return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); + else + return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); } } } @@ -660,7 +834,7 @@ if (SI.getType()->isIntegerTy()) { if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; - + // MAX(MAX(a, b), a) -> MAX(a, b) // MIN(MIN(a, b), a) -> MIN(a, b) // MAX(MIN(a, b), a) -> a @@ -683,13 +857,26 @@ } // See if we can fold the select into a phi node if the condition is a select. - if (isa<PHINode>(SI.getCondition())) + if (isa<PHINode>(SI.getCondition())) // The true/false values have to be live in the PHI predecessor's blocks. if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) if (Instruction *NV = FoldOpIntoPhi(SI)) return NV; + if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { + if (TrueSI->getCondition() == CondVal) { + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } + } + if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { + if (FalseSI->getCondition() == CondVal) { + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } + } + if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); SI.setOperand(1, FalseVal);
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp index 1fc73d6..6d85add 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -13,6 +13,8 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -21,25 +23,6 @@ assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // shl X, 0 == X and shr X, 0 == X - // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Constant::getNullValue(Op1->getType()) || - Op0 == Constant::getNullValue(Op0->getType())) - return ReplaceInstUsesWith(I, Op0); - - if (isa<UndefValue>(Op0)) { - if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef - return ReplaceInstUsesWith(I, Op0); - else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa<UndefValue>(Op1)) { - if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X - return ReplaceInstUsesWith(I, Op0); - else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - // See if we can fold away this shift. if (SimplifyDemandedInstructionBits(I)) return &I; @@ -53,13 +36,295 @@ if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; + + // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. + // Because shifts by negative values (which could occur if A were negative) + // are undefined. + Value *A; const APInt *B; + if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { + // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't + // demand the sign bit (and many others) here?? + Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1), + Op1->getName()); + I.setOperand(1, Rem); + return &I; + } + return 0; } +/// CanEvaluateShifted - See if we can compute the specified value, but shifted +/// logically to the left or right by some number of bits. This should return +/// true if the expression can be computed for the same cost as the current +/// expression tree. This is used to eliminate extraneous shifting from things +/// like: +/// %C = shl i128 %A, 64 +/// %D = shl i128 %B, 96 +/// %E = or i128 %C, %D +/// %F = lshr i128 %E, 64 +/// where the client will ask if E can be computed shifted right by 64-bits. If +/// this succeeds, the GetShiftedValue function will be called to produce the +/// value. +static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If this is the opposite shift, we can directly reuse the input of the shift + // if the needed bits are already zero in the input. This allows us to reuse + // the value which means that we don't care if the shift has multiple uses. + // TODO: Handle opposite shift by exact value. + ConstantInt *CI = 0; + if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || + (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { + if (CI->getZExtValue() == NumBits) { + // TODO: Check that the input bits are already zero with MaskedValueIsZero +#if 0 + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } +#endif + + } + } + + // We can't mutate something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + default: return false; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && + CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); + + case Instruction::Shl: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) return true; + + // We can always turn shl(c)+shr(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned LowBits = TypeWidth - CI->getZExtValue(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + return true; + } + + return false; + } + case Instruction::LShr: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) return true; + + // We can always turn lshr(c)+shl(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned LowBits = CI->getZExtValue() - NumBits; + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + return true; + } + + return false; + } + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && + CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) + return false; + return true; + } + } +} + +/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, +/// this value inserts the new computation that produces the shifted value. +static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (Constant *C = dyn_cast<Constant>(V)) { + if (isLeftShift) + V = IC.Builder->CreateShl(C, NumBits); + else + V = IC.Builder->CreateLShr(C, NumBits); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + V = ConstantFoldConstantExpression(CE, IC.getTargetData()); + return V; + } + + Instruction *I = cast<Instruction>(V); + IC.Worklist.Add(I); + + switch (I->getOpcode()) { + default: assert(0 && "Inconsistency with CanEvaluateShifted"); + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + return I; + + case Instruction::Shl: { + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); + + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(I->getType()); + + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return I; + } + + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(BO->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(BO); + VI->takeName(BO); + } + return V; + } + + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return BO; + } + case Instruction::LShr: { + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(BO->getType()); + + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setIsExact(false); + return I; + } + + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(I->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(I); + VI->takeName(I); + } + return V; + } + + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setIsExact(false); + return BO; + } + + case Instruction::Select: + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + return I; + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), + NumBits, isLeftShift, IC)); + return PN; + } + } +} + + + Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; - + + + // See if we can propagate this shift into the input, this covers the trivial + // cast of lshr(shl(x,c1),c2) as well as other more complex cases. + if (I.getOpcode() != Instruction::AShr && + CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" + " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); + + return ReplaceInstUsesWith(I, + GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + } + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); @@ -103,7 +368,7 @@ // Okay, we'll do this xform. Make the shift of shift. Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt); + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to @@ -126,7 +391,8 @@ // shift1 & 0x00FF Value *And = Builder->CreateAnd(NSh, - ConstantInt::get(I.getContext(), MaskV)); + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); @@ -150,9 +416,10 @@ match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1); + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1); + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); @@ -166,9 +433,11 @@ m_ConstantInt(CC))) && cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1); + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1)); + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } @@ -180,9 +449,10 @@ match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1); + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS); + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); @@ -196,9 +466,10 @@ cast<BinaryOperator>(Op0BO->getOperand(0)) ->getOperand(0)->hasOneUse()) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1); + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1)); + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -266,7 +537,7 @@ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - const IntegerType *Ty = cast<IntegerType>(I.getType()); + IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { @@ -282,39 +553,17 @@ ConstantInt::get(Ty, AmtSum)); } - if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { - if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { - // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. - if (AmtSum >= TypeBits) - AmtSum = TypeBits-1; - - Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(), Mask)); - } - - // Okay, if we get here, one shift must be left, and the other shift must be - // right. See if the amounts are equal. if (ShiftAmt1 == ShiftAmt2) { // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),Mask)); } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), Mask)); @@ -323,7 +572,8 @@ uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); @@ -334,7 +584,8 @@ } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::Shl); Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); @@ -349,9 +600,8 @@ uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, ConstantInt::get(Ty, ShiftDiff)); @@ -361,8 +611,8 @@ } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); @@ -377,16 +627,56 @@ } Instruction *InstCombiner::visitShl(BinaryOperator &I) { - return commonShiftTransforms(I); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), + I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + TD)) + return ReplaceInstUsesWith(I, V); + + if (Instruction *V = commonShiftTransforms(I)) + return V; + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { + unsigned ShAmt = Op1C->getZExtValue(); + + // If the shifted-out value is known-zero, then this is a NUW shift. + if (!I.hasNoUnsignedWrap() && + MaskedValueIsZero(I.getOperand(0), + APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { + I.setHasNoUnsignedWrap(); + return &I; + } + + // If the shifted out value is all signbits, this is a NSW shift. + if (!I.hasNoSignedWrap() && + ComputeNumSignBits(I.getOperand(0)) > ShAmt) { + I.setHasNoSignedWrap(); + return &I; + } + } + + // (C1 << A) << C2 -> (C1 << C2) << A + Constant *C1, *C2; + Value *A; + if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) && + match(I.getOperand(1), m_Constant(C2))) + return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + + return 0; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *R = commonShiftTransforms(I)) return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) { unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); // ctlz.i32(x)>>5 --> zext(x == 0) @@ -395,7 +685,7 @@ if ((II->getIntrinsicID() == Intrinsic::ctlz || II->getIntrinsicID() == Intrinsic::cttz || II->getIntrinsicID() == Intrinsic::ctpop) && - isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){ + isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) { bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); @@ -403,29 +693,37 @@ } } + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } + } + return 0; } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *R = commonShiftTransforms(I)) return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) { - // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) - if (CSI->isAllOnesValue()) - return ReplaceInstUsesWith(I, CSI); - } - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we // have a sign-extend idiom. Value *X; if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) { - // If the input value is known to already be sign extended enough, delete - // the extension. - if (ComputeNumSignBits(X) > Op1C->getZExtValue()) + // If the left shift is just shifting out partial signbits, delete the + // extension. + if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) return ReplaceInstUsesWith(I, X); // If the input is an extension from the shifted amount value, e.g. @@ -440,6 +738,13 @@ return new SExtInst(ZI->getOperand(0), ZI->getType()); } } + + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } } // See if we can turn a signed shr into an unsigned shr.
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 02609cb..5cd9a4b 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -34,7 +34,7 @@ if (!OpC) return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); if ((~Demanded & OpC->getValue()) == 0) return false; @@ -103,7 +103,7 @@ assert(V != 0 && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); - const Type *VTy = V->getType(); + Type *VTy = V->getType(); assert((TD || !VTy->isPointerTy()) && "SimplifyDemandedBits needs to know bit widths!"); assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && @@ -121,13 +121,13 @@ } if (isa<ConstantPointerNull>(V)) { // We know all of the bits for a constant! - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = DemandedMask; return 0; } - KnownZero.clear(); - KnownOne.clear(); + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); if (DemandedMask == 0) { // Not demanding any bits from V. if (isa<UndefValue>(V)) return 0; @@ -311,8 +311,9 @@ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1)); - return InsertNewInstBefore(Or, *I); + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstWith(Or, *I); } // If all of the demanded bits on one side are known, and all of the set @@ -324,9 +325,8 @@ if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { Constant *AndC = Constant::getIntegerValue(VTy, ~RHSKnownOne & DemandedMask); - Instruction *And = - BinaryOperator::CreateAnd(I->getOperand(0), AndC); - return InsertNewInstBefore(And, *I); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + return InsertNewInstWith(And, *I); } } @@ -350,15 +350,13 @@ Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); - Instruction *NewAnd = - BinaryOperator::CreateAnd(I->getOperand(0), AndC); - InsertNewInstBefore(NewAnd, *I); + Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + InsertNewInstWith(NewAnd, *I); Constant *XorC = ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); - Instruction *NewXor = - BinaryOperator::CreateXor(NewAnd, XorC); - return InsertNewInstBefore(NewXor, *I); + Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); + return InsertNewInstWith(NewXor, *I); } // Output known-0 bits are known if clear or set in both the LHS & RHS. @@ -387,15 +385,15 @@ break; case Instruction::Trunc: { unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.zext(truncBf); - KnownZero.zext(truncBf); - KnownOne.zext(truncBf); + DemandedMask = DemandedMask.zext(truncBf); + KnownZero = KnownZero.zext(truncBf); + KnownOne = KnownOne.zext(truncBf); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; - DemandedMask.trunc(BitWidth); - KnownZero.trunc(BitWidth); - KnownOne.trunc(BitWidth); + DemandedMask = DemandedMask.trunc(BitWidth); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; } @@ -403,8 +401,8 @@ if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) return 0; // vector->int or fp->int? - if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { - if (const VectorType *SrcVTy = + if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. @@ -425,15 +423,15 @@ // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + DemandedMask = DemandedMask.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; - DemandedMask.zext(BitWidth); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + DemandedMask = DemandedMask.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // The top bits are known to be zero. KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); @@ -450,17 +448,17 @@ // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. if ((NewBits & DemandedMask) != 0) - InputDemandedBits.set(SrcBitWidth-1); + InputDemandedBits.setBit(SrcBitWidth-1); - InputDemandedBits.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, KnownOne, Depth+1)) return I; - InputDemandedBits.zext(BitWidth); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + InputDemandedBits = InputDemandedBits.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -470,8 +468,8 @@ // convert this into a zero extension. if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { // Convert to ZExt cast - CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy); - return InsertNewInstBefore(NewCast, *I); + CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); + return InsertNewInstWith(NewCast, *I); } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set KnownOne |= NewBits; } @@ -512,8 +510,9 @@ // Turn it into OR if input bits are zero. if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1)); - return InsertNewInstBefore(Or, *I); + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstWith(Or, *I); } // We can say something about the output known-zero and known-one bits, @@ -574,8 +573,16 @@ break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + + // If the shift is NUW/NSW, then it does demand the high bits. + ShlOperator *IOp = cast<ShlOperator>(I); + if (IOp->hasNoSignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + else if (IOp->hasNoUnsignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -590,10 +597,16 @@ case Instruction::LShr: // For a logical shift right if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<LShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -615,8 +628,8 @@ if (DemandedMask == 1) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), I->getOperand(1)); - return InsertNewInstBefore(NewVal, *I); + I->getOperand(0), I->getOperand(1), I->getName()); + return InsertNewInstWith(NewVal, *I); } // If the sign bit is the only bit demanded by this ashr, then there is no @@ -625,14 +638,20 @@ return I->getOperand(0); if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); // If any of the "high bits" are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.set(BitWidth-1); + DemandedMaskIn.setBit(BitWidth-1); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<AShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -653,8 +672,8 @@ (HighBits & ~DemandedMask) == HighBits) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), SA); - return InsertNewInstBefore(NewVal, *I); + I->getOperand(0), SA, I->getName()); + return InsertNewInstWith(NewVal, *I); } else if ((KnownOne & SignBit) != 0) { // New bits are known one. KnownOne |= HighBits; } @@ -662,6 +681,10 @@ break; case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (Rem->isAllOnesValue()) + break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits @@ -690,6 +713,18 @@ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); } } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } break; case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); @@ -736,12 +771,16 @@ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); - return InsertNewInstBefore(NewVal, *I); + return InsertNewInstWith(NewVal, *I); } // TODO: Could compute known zero/one bits based on the input. break; } + case Intrinsic::x86_sse42_crc32_64_8: + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + return 0; } } ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); @@ -784,17 +823,17 @@ UndefElts = 0; if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; for (unsigned i = 0; i != VWidth; ++i) if (!DemandedElts[i]) { // If not demanded, set to undef. Elts.push_back(Undef); - UndefElts.set(i); + UndefElts.setBit(i); } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef. Elts.push_back(Undef); - UndefElts.set(i); + UndefElts.setBit(i); } else { // Otherwise, defined. Elts.push_back(CV->getOperand(i)); } @@ -813,7 +852,7 @@ if (DemandedElts.isAllOnesValue()) return 0; - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Zero = Constant::getNullValue(EltTy); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; @@ -829,7 +868,7 @@ if (Depth == 10) return 0; - // If multiple users are using the root value, procede with + // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements // are needed. if (!V->hasOneUse()) { @@ -877,13 +916,13 @@ // Otherwise, the element inserted overwrites whatever was there, so the // input demanded set is simpler than the output set. APInt DemandedElts2 = DemandedElts; - DemandedElts2.clear(IdxNo); + DemandedElts2.clearBit(IdxNo); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, UndefElts, Depth+1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } // The inserted element is defined. - UndefElts.clear(IdxNo); + UndefElts.clearBit(IdxNo); break; } case Instruction::ShuffleVector: { @@ -898,9 +937,9 @@ assert(MaskVal < LHSVWidth * 2 && "shufflevector mask index out of range!"); if (MaskVal < LHSVWidth) - LeftDemanded.set(MaskVal); + LeftDemanded.setBit(MaskVal); else - RightDemanded.set(MaskVal - LHSVWidth); + RightDemanded.setBit(MaskVal - LHSVWidth); } } } @@ -919,16 +958,19 @@ for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { - UndefElts.set(i); + UndefElts.setBit(i); + } else if (!DemandedElts[i]) { + NewUndefElts = true; + UndefElts.setBit(i); } else if (MaskVal < LHSVWidth) { if (UndefElts4[MaskVal]) { NewUndefElts = true; - UndefElts.set(i); + UndefElts.setBit(i); } } else { if (UndefElts3[MaskVal - LHSVWidth]) { NewUndefElts = true; - UndefElts.set(i); + UndefElts.setBit(i); } } } @@ -950,7 +992,7 @@ } case Instruction::BitCast: { // Vector->vector casts only. - const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); + VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); if (!VTy) break; unsigned InVWidth = VTy->getNumElements(); APInt InputDemandedElts(InVWidth, 0); @@ -971,7 +1013,7 @@ Ratio = VWidth/InVWidth; for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { if (DemandedElts[OutIdx]) - InputDemandedElts.set(OutIdx/Ratio); + InputDemandedElts.setBit(OutIdx/Ratio); } } else { // Untested so far. @@ -983,7 +1025,7 @@ Ratio = InVWidth/VWidth; for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) if (DemandedElts[InIdx/Ratio]) - InputDemandedElts.set(InIdx); + InputDemandedElts.setBit(InIdx); } // div/rem demand all inputs, because they don't want divide by zero. @@ -1002,7 +1044,7 @@ // undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) if (UndefElts2[OutIdx/Ratio]) - UndefElts.set(OutIdx); + UndefElts.setBit(OutIdx); } else if (VWidth < InVWidth) { llvm_unreachable("Unimp"); // If there are more elements in the source than there are in the result, @@ -1011,7 +1053,7 @@ UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) if (!UndefElts2[InIdx]) // Not undef? - UndefElts.clear(InIdx/Ratio); // Clear undef bit. + UndefElts.clearBit(InIdx/Ratio); // Clear undef bit. } break; } @@ -1070,28 +1112,31 @@ Value *LHS = II->getArgOperand(0); Value *RHS = II->getArgOperand(1); // Extract the element as scalars. - LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, + LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, + RHS = InsertNewInstWith(ExtractElementInst::Create(RHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS), *II); + TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS, + II->getName()), *II); break; case Intrinsic::x86_sse_mul_ss: case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS), *II); + TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS, + II->getName()), *II); break; } Instruction *New = InsertElementInst::Create( UndefValue::get(II->getType()), TmpV, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false)); - InsertNewInstBefore(New, *II); + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), + II->getName()); + InsertNewInstWith(New, *II); return New; } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 32ffa55..154267c 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -18,7 +18,7 @@ /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa<ConstantAggregateZero>(V)) + if (isa<ConstantAggregateZero>(V)) return true; if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { if (isConstant) return true; @@ -31,7 +31,7 @@ } Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // Insert element gets simplified to the inserted element or is deleted if // this is constant idx extract element and its a constant idx insertelt. if (I->getOpcode() == Instruction::InsertElement && isConstant && @@ -49,26 +49,24 @@ (CheapToScalarize(CI->getOperand(0), isConstant) || CheapToScalarize(CI->getOperand(1), isConstant))) return true; - + return false; } -/// Read and decode a shufflevector mask. -/// -/// It turns undef elements into values that are larger than the number of -/// elements in the input. -static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { +/// getShuffleMask - Read and decode a shufflevector mask. +/// Turn undef elements into negative values. +static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) { unsigned NElts = SVI->getType()->getNumElements(); if (isa<ConstantAggregateZero>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 0); + return std::vector<int>(NElts, 0); if (isa<UndefValue>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 2*NElts); - - std::vector<unsigned> Result; + return std::vector<int>(NElts, -1); + + std::vector<int> Result; const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) if (isa<UndefValue>(*i)) - Result.push_back(NElts*2); // undef -> 8 + Result.push_back(-1); // undef else Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); return Result; @@ -79,46 +77,45 @@ /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - const VectorType *PTy = cast<VectorType>(V->getType()); + VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(PTy->getElementType()); - + if (isa<UndefValue>(V)) return UndefValue::get(PTy->getElementType()); if (isa<ConstantAggregateZero>(V)) return Constant::getNullValue(PTy->getElementType()); if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) return CP->getOperand(EltNo); - + if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. - if (!isa<ConstantInt>(III->getOperand(2))) + if (!isa<ConstantInt>(III->getOperand(2))) return 0; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); - + // If this is an insert to the element we are looking for, return the // inserted value. - if (EltNo == IIElt) + if (EltNo == IIElt) return III->getOperand(1); - + // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return FindScalarElement(III->getOperand(0), EltNo); } - + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - unsigned InEl = getShuffleMask(SVI)[EltNo]; - if (InEl < LHSWidth) - return FindScalarElement(SVI->getOperand(0), InEl); - else if (InEl < LHSWidth*2) - return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); - else + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + int InEl = getShuffleMask(SVI)[EltNo]; + if (InEl < 0) return UndefValue::get(PTy->getElementType()); + if (InEl < (int)LHSWidth) + return FindScalarElement(SVI->getOperand(0), InEl); + return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } - + // Otherwise, we don't know. return 0; } @@ -127,11 +124,11 @@ // If vector val is undef, replace extract with scalar undef. if (isa<UndefValue>(EI.getOperand(0))) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - + // If vector val is constant 0, replace extract with scalar 0. if (isa<ConstantAggregateZero>(EI.getOperand(0))) return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - + if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { // If vector val is constant with all elements the same, replace EI with // that element. When the elements are not identical, we cannot replace yet @@ -139,53 +136,53 @@ Constant *op0 = C->getOperand(0); for (unsigned i = 1; i != C->getNumOperands(); ++i) if (C->getOperand(i) != op0) { - op0 = 0; + op0 = 0; break; } if (op0) return ReplaceInstUsesWith(EI, op0); } - + // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - + // If this is extracting an invalid index, turn this into undef, to avoid // crashing the code below. if (IndexVal >= VectorWidth) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - + // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use // property. if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { APInt UndefElts(VectorWidth, 0); APInt DemandedMask(VectorWidth, 0); - DemandedMask.set(IndexVal); + DemandedMask.setBit(IndexVal); if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, UndefElts)) { EI.setOperand(0, V); return &EI; } } - + if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) return ReplaceInstUsesWith(EI, Elt); - + // If the this extractelement is directly using a bitcast from a vector of // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (const VectorType *VT = + if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); } } - + if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { // Push extractelement into predecessor operation if legal and // profitable to do so @@ -193,9 +190,11 @@ if (I->hasOneUse() && CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1)); + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1)); + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); } } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { @@ -213,66 +212,75 @@ // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; Value *Src; unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - - if (SrcIdx < LHSWidth) + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + + if (SrcIdx < 0) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + if (SrcIdx < (int)LHSWidth) Src = SVI->getOperand(0); - else if (SrcIdx < LHSWidth*2) { + else { SrcIdx -= LHSWidth; Src = SVI->getOperand(1); - } else { - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); } + Type *Int32Ty = Type::getInt32Ty(EI.getContext()); return ExtractElementInst::Create(Src, - ConstantInt::get(Type::getInt32Ty(EI.getContext()), + ConstantInt::get(Int32Ty, SrcIdx, false)); } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + // Canonicalize extractelement(cast) -> cast(extractelement) + // bitcasts can change the number of vector elements and they cost nothing + if (CI->hasOneUse() && EI.hasOneUse() && + (CI->getOpcode() != Instruction::BitCast)) { + Value *EE = Builder->CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns -/// elements from either LHS or RHS, return the shuffle mask and true. +/// elements from either LHS or RHS, return the shuffle mask and true. /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, std::vector<Constant*> &Mask) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - + if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return true; } - + if (V == LHS) { for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); return true; } - + if (V == RHS) { for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i+NumElts)); return true; } - + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); Value *IdxOp = IEI->getOperand(2); - + if (!isa<ConstantInt>(IdxOp)) return false; unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. // Okay, we can handle this if the vector we are insertinting into is // transitively ok. @@ -280,13 +288,13 @@ // If so, update the mask to reflect the inserted undef. Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); return true; - } + } } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ if (isa<ConstantInt>(EI->getOperand(1)) && EI->getOperand(0)->getType() == V->getType()) { unsigned ExtractedIdx = cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); - + // This must be extracting from either LHS or RHS. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { // Okay, we can handle this if the vector we are insertinting into is @@ -294,15 +302,14 @@ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted value. if (EI->getOperand(0) == LHS) { - Mask[InsertedIdx % NumElts] = + Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), ExtractedIdx); } else { assert(EI->getOperand(0) == RHS); - Mask[InsertedIdx % NumElts] = + Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), ExtractedIdx+NumElts); - } return true; } @@ -311,7 +318,7 @@ } } // TODO: Handle shufflevector here! - + return false; } @@ -320,11 +327,11 @@ /// that computes V and the LHS value of the shuffle. static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, Value *&RHS) { - assert(V->getType()->isVectorTy() && + assert(V->getType()->isVectorTy() && (RHS == 0 || V->getType() == RHS->getType()) && "Invalid shuffle!"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - + if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; @@ -336,25 +343,25 @@ Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); Value *IdxOp = IEI->getOperand(2); - + if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && EI->getOperand(0)->getType() == V->getType()) { unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + // Either the extracted from or inserted into vector must be RHSVec, // otherwise we'd end up with a shuffle of three inputs. if (EI->getOperand(0) == RHS || RHS == 0) { RHS = EI->getOperand(0); Value *V = CollectShuffleElements(VecOp, Mask, RHS); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(V->getContext()), - NumElts+ExtractedIdx); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+ExtractedIdx); return V; } - + if (VecOp == RHS) { Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); // Everything but the extracted element is replaced with the RHS. @@ -365,7 +372,7 @@ } return V; } - + // If this insertelement is a chain that comes from exactly these two // vectors, return the vector and the effective shuffle. if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) @@ -374,7 +381,7 @@ } } // TODO: Handle shufflevector here! - + // Otherwise, can't do anything fancy. Return an identity vector. for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); @@ -385,32 +392,32 @@ Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); Value *IdxOp = IE.getOperand(2); - + // Inserting an undef or into an undefined place, remove this. if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) ReplaceInstUsesWith(IE, VecOp); - - // If the inserted element was extracted from some other vector, and if the + + // If the inserted element was extracted from some other vector, and if the // indexes are constant, try to turn this into a shufflevector operation. if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && EI->getOperand(0)->getType() == IE.getType()) { unsigned NumVectorElts = IE.getType()->getNumElements(); unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + if (ExtractedIdx >= NumVectorElts) // Out of range extract. return ReplaceInstUsesWith(IE, VecOp); - + if (InsertedIdx >= NumVectorElts) // Out of range insert. return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); - + // If we are extracting a value from a vector, then inserting it right // back into the same place, just use the input vector. if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) - return ReplaceInstUsesWith(IE, VecOp); - + return ReplaceInstUsesWith(IE, VecOp); + // If this insertelement isn't used by some other insertelement, turn it // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { @@ -419,18 +426,20 @@ Value *LHS = CollectShuffleElements(&IE, Mask, RHS); if (RHS == 0) RHS = UndefValue::get(LHS->getType()); // We now have a shuffle of LHS, RHS, Mask. - return new ShuffleVectorInst(LHS, RHS, - ConstantVector::get(Mask)); + return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask)); } } } - + unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) + if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { + if (V != &IE) + return ReplaceInstUsesWith(IE, V); return &IE; - + } + return 0; } @@ -438,40 +447,46 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - std::vector<unsigned> Mask = getShuffleMask(&SVI); - + std::vector<int> Mask = getShuffleMask(&SVI); + bool MadeChange = false; - + // Undefined shuffle mask -> undefined value. if (isa<UndefValue>(SVI.getOperand(2))) return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - - unsigned VWidth = Mask.size(); - unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); - + + unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); + + if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) + return 0; + APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (V != &SVI) + return ReplaceInstUsesWith(SVI, V); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; } - + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). if (LHS == RHS || isa<UndefValue>(LHS)) { - if (isa<UndefValue>(LHS) && LHS == RHS) - return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - + if (isa<UndefValue>(LHS) && LHS == RHS) { + // shuffle(undef,undef,mask) -> undef. + return ReplaceInstUsesWith(SVI, LHS); + } + // Remap any references to RHS to use LHS. std::vector<Constant*> Elts; - for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { - if (Mask[i] >= 2*e) + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); else { - if ((Mask[i] >= e && isa<UndefValue>(RHS)) || - (Mask[i] < e && isa<UndefValue>(LHS))) { - Mask[i] = 2*e; // Turn into undef. + if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || + (Mask[i] < (int)e && isa<UndefValue>(LHS))) { + Mask[i] = -1; // Turn into undef. Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); } else { Mask[i] = Mask[i] % e; // Force to LHS. @@ -487,150 +502,74 @@ RHS = SVI.getOperand(1); MadeChange = true; } - + // Analyze the shuffle, are the LHS or RHS and identity shuffles? - if (VWidth == LHSWidth) { - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= e*2) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } - - // Eliminate identity shuffles. - if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); - if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - } - - // Check for a handful of important shuffle(shuffle()) combinations. - ShuffleVectorInst *LSVI = dyn_cast<ShuffleVectorInst>(LHS); - if (!LSVI) - return MadeChange ? &SVI : 0; + bool isLHSID = true, isRHSID = true; - LHS = LSVI->getOperand(0); - std::vector<unsigned> LHSMask = getShuffleMask(LSVI); - unsigned LHSInNElts = cast<VectorType>(LHS->getType())->getNumElements(); - - // If lhs is identity, propagate - bool isLHSLoExtract = true, isLHSHiExtract = true; - for (unsigned i = 0, e = LHSMask.size(); i != e; ++i) { - if (LHSMask[i] >= LHSInNElts*2) continue; // Ignore undef values; - isLHSLoExtract &= (LHSMask[i] == i); - isLHSHiExtract &= (LHSMask[i] == i+(LHSInNElts/2)); - } - if ((isLHSLoExtract || isLHSHiExtract) && - (isa<UndefValue>(RHS) || (LHSWidth == LHSInNElts))) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = VWidth; i != e; ++i) { - if (Mask[i] >= 2*LHSWidth) - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - LHSMask[Mask[i]])); - } - if (isa<UndefValue>(RHS)) - RHS = UndefValue::get(LHS->getType()); - return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Elts)); - } - - // If svi + lhs forms a full unpack, merge it. This allows llvm to emit - // efficient code for matrix transposes written with generic vector ops. - if ((LHSMask.size() == Mask.size()) && isPowerOf2_32(Mask.size()) && - (Mask.size() > 1)) { - bool isUnpackLo = true, isUnpackHi = true; - // check lhs mask for <0, u, 1, u .. >; - for (unsigned i = 0, e = LHSMask.size(); i != e; ++i) { - if (LHSMask[i] >= 2*e) continue; - isUnpackLo &= (LHSMask[i] == (i/2)); - isUnpackHi &= (LHSMask[i] == (i/2) + (e/2)); - } - for (unsigned i = 0, e = Mask.size(); i != e && (isUnpackLo || isUnpackHi); - i += 2) { - isUnpackLo &= (Mask[i] == i) && (Mask[i+1] == (i/2)+e); - isUnpackHi &= (Mask[i] == i) && (Mask[i+1] == (i/2)+e+(e/2)); - } - if (isUnpackLo || isUnpackHi) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= 2*e) - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else if (Mask[i] >= e) - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - Mask[i])); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - LHSMask[Mask[i]])); - } - return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Elts)); - } + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); } - // If rhs is shuffle + identity, propagate. - if (ShuffleVectorInst *RSVI = dyn_cast<ShuffleVectorInst>(RHS)) { - std::vector<unsigned> RHSMask = getShuffleMask(RSVI); - unsigned RHSInNElts = - cast<VectorType>(RSVI->getOperand(0)->getType())->getNumElements(); + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - // If rhs is identity, propagate - bool isRHSLoExtract = true, isRHSHiExtract = true; - for (unsigned i = 0, e = RHSMask.size(); i != e; ++i) { - if (RHSMask[i] >= RHSInNElts*2) continue; // Ignore undef values; - isRHSLoExtract &= (RHSMask[i] == i); - isRHSHiExtract &= (RHSMask[i] == i+(RHSInNElts/2)); - } - if ((isRHSLoExtract || isRHSHiExtract) && (LHSWidth == RHSInNElts)) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = VWidth; i != e; ++i) { - if (Mask[i] >= 2*LHSWidth) - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else if (Mask[i] < LHSWidth) - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - Mask[i])); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - RHSMask[Mask[i]-LHSWidth]+LHSWidth)); - } - SVI.setOperand(1, RSVI->getOperand(0)); - SVI.setOperand(2, ConstantVector::get(Elts)); - return &SVI; - } - } - - // Be extremely conservative when merging shufflevector instructions. It is - // difficult for the code generator to recognize a merged shuffle, which - // usually leads to worse code from merging a shuffle. - if (!isa<UndefValue>(RHS)) - return MadeChange ? &SVI : 0; - - // If the merged shuffle mask is one of the two input shuffle masks, which - // just removes one instruction. This should handle splat(splat) -> splat. - if (LHSMask.size() == Mask.size()) { - std::vector<unsigned> NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); - - // If the result mask is equal to the src shuffle or this shuffle mask, - // do the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - } else { - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - NewMask[i])); + // If the LHS is a shufflevector itself, see if we can combine it with this + // one without producing an unusual shuffle. Here we are really conservative: + // we are absolutely afraid of producing a shuffle mask not in the input + // program, because the code gen may not be smart enough to turn a merged + // shuffle into two specific shuffles: it may produce worse code. As such, + // we only merge two shuffles if the result is either a splat or one of the + // two input shuffle masks. In this case, merging the shuffles just removes + // one instruction, which we know is safe. This is good for things like + // turning: (splat(splat)) -> splat. + if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { + if (isa<UndefValue>(RHS)) { + std::vector<int> LHSMask = getShuffleMask(LHSSVI); + + if (LHSMask.size() == Mask.size()) { + std::vector<int> NewMask; + bool isSplat = true; + int SplatElt = -1; // undef + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + int MaskElt; + if (Mask[i] < 0 || Mask[i] >= (int)e) + MaskElt = -1; // undef + else + MaskElt = LHSMask[Mask[i]]; + // Check if this could still be a splat. + if (MaskElt >= 0) { + if (SplatElt >=0 && SplatElt != MaskElt) + isSplat = false; + SplatElt = MaskElt; + } + NewMask.push_back(MaskElt); + } + + // If the result mask is equal to the src shuffle or this + // shuffle mask, do the replacement. + if (isSplat || NewMask == LHSMask || NewMask == Mask) { + std::vector<Constant*> Elts; + Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { + if (NewMask[i] < 0) { + Elts.push_back(UndefValue::get(Int32Ty)); + } else { + Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i])); + } + } + return new ShuffleVectorInst(LHSSVI->getOperand(0), + LHSSVI->getOperand(1), + ConstantVector::get(Elts)); } } - return new ShuffleVectorInst(LHS, LSVI->getOperand(1), - ConstantVector::get(Elts)); } } + return MadeChange ? &SVI : 0; }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h b/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h index 9100a85..32009c3 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -53,6 +53,7 @@ void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { assert(Worklist.empty() && "Worklist must be empty to add initial group"); Worklist.reserve(NumEntries+16); + WorklistMap.resize(NumEntries); DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); for (; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1];
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp b/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp index 72f6b5c..c15b805 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -39,14 +39,18 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm-c/Initialization.h" #include <algorithm> #include <climits> using namespace llvm; @@ -56,14 +60,24 @@ STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor , "Number of factorizations"); +STATISTIC(NumReassoc , "Number of reassociations"); +// Initialization Routines +void llvm::initializeInstCombine(PassRegistry &Registry) { + initializeInstCombinerPass(Registry); +} + +void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { + initializeInstCombine(*unwrap(R)); +} char InstCombiner::ID = 0; INITIALIZE_PASS(InstCombiner, "instcombine", - "Combine redundant instructions", false, false); + "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } @@ -71,7 +85,7 @@ /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. -bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { +bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); // If we don't have TD, we don't know if the source/dest are legal. @@ -95,54 +109,374 @@ return true; } +// Return true, if No Signed Wrap should be maintained for I. +// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", +// where both B and C should be ConstantInts, results in a constant that does +// not overflow. This function only handles the Add and Sub opcodes. For +// all other opcodes, the function conservatively returns false. +static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { + OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I); + if (!OBO || !OBO->hasNoSignedWrap()) { + return false; + } -// SimplifyCommutative - This performs a few simplifications for commutative -// operators: + // We reason about Add and Sub Only. + Instruction::BinaryOps Opcode = I.getOpcode(); + if (Opcode != Instruction::Add && + Opcode != Instruction::Sub) { + return false; + } + + ConstantInt *CB = dyn_cast<ConstantInt>(B); + ConstantInt *CC = dyn_cast<ConstantInt>(C); + + if (!CB || !CC) { + return false; + } + + const APInt &BVal = CB->getValue(); + const APInt &CVal = CC->getValue(); + bool Overflow = false; + + if (Opcode == Instruction::Add) { + BVal.sadd_ov(CVal, Overflow); + } else { + BVal.ssub_ov(CVal, Overflow); + } + + return !Overflow; +} + +/// SimplifyAssociativeOrCommutative - This performs a few simplifications for +/// operators which are associative or commutative: +// +// Commutative operators: // // 1. Order operands such that they are listed from right (least complex) to // left (most complex). This puts constants before unary operators before // binary operators. // -// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) -// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) +// Associative operators: // -bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { - bool Changed = false; - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) - Changed = !I.swapOperands(); - - if (!I.isAssociative()) return Changed; - +// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. +// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. +// +// Associative and commutative operators: +// +// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. +// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. +// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" +// if C1 and C2 are constants. +// +bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Instruction::BinaryOps Opcode = I.getOpcode(); - if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) - if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { - if (isa<Constant>(I.getOperand(1))) { - Constant *Folded = ConstantExpr::get(I.getOpcode(), - cast<Constant>(I.getOperand(1)), - cast<Constant>(Op->getOperand(1))); - I.setOperand(0, Op->getOperand(0)); - I.setOperand(1, Folded); - return true; - } - - if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1))) - if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && - Op->hasOneUse() && Op1->hasOneUse()) { - Constant *C1 = cast<Constant>(Op->getOperand(1)); - Constant *C2 = cast<Constant>(Op1->getOperand(1)); + bool Changed = false; - // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), - Op1->getOperand(0), - &I); - Worklist.Add(New); - I.setOperand(0, New); - I.setOperand(1, Folded); - return true; + do { + // Order operands such that they are listed from right (least complex) to + // left (most complex). This puts constants before unary operators before + // binary operators. + if (I.isCommutative() && getComplexity(I.getOperand(0)) < + getComplexity(I.getOperand(1))) + Changed = !I.swapOperands(); + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0)); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)); + + if (I.isAssociative()) { + // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "B op C" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) { + // It simplifies to V. Form "A op V". + I.setOperand(0, A); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + if (MaintainNoSignedWrap(I, B, C) && + (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { + // Note: this is only valid because SimplifyBinOp doesn't look at + // the operands to Op0. + I.clearSubclassOptionalData(); + I.setHasNoSignedWrap(true); + } else { + I.clearSubclassOptionalData(); + } + + Changed = true; + ++NumReassoc; + continue; } + } + + // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "A op B" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) { + // It simplifies to V. Form "V op C". + I.setOperand(0, V); + I.setOperand(1, C); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } } - return Changed; + + if (I.isAssociative() && I.isCommutative()) { + // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "V op B". + I.setOperand(0, V); + I.setOperand(1, B); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "B op V". + I.setOperand(0, B); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" + // if C1 and C2 are constants. + if (Op0 && Op1 && + Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && + isa<Constant>(Op0->getOperand(1)) && + isa<Constant>(Op1->getOperand(1)) && + Op0->hasOneUse() && Op1->hasOneUse()) { + Value *A = Op0->getOperand(0); + Constant *C1 = cast<Constant>(Op0->getOperand(1)); + Value *B = Op1->getOperand(0); + Constant *C2 = cast<Constant>(Op1->getOperand(1)); + + Constant *Folded = ConstantExpr::get(Opcode, C1, C2); + BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); + InsertNewInstWith(New, I); + New->takeName(Op1); + I.setOperand(0, New); + I.setOperand(1, Folded); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + + Changed = true; + continue; + } + } + + // No further simplifications. + return Changed; + } while (1); +} + +/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to +/// "(X LOp Y) ROp (X LOp Z)". +static bool LeftDistributesOverRight(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + switch (LOp) { + default: + return false; + + case Instruction::And: + // And distributes over Or and Xor. + switch (ROp) { + default: + return false; + case Instruction::Or: + case Instruction::Xor: + return true; + } + + case Instruction::Mul: + // Multiplication distributes over addition and subtraction. + switch (ROp) { + default: + return false; + case Instruction::Add: + case Instruction::Sub: + return true; + } + + case Instruction::Or: + // Or distributes over And. + switch (ROp) { + default: + return false; + case Instruction::And: + return true; + } + } +} + +/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to +/// "(X ROp Z) LOp (Y ROp Z)". +static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + if (Instruction::isCommutative(ROp)) + return LeftDistributesOverRight(ROp, LOp); + // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", + // but this requires knowing that the addition does not overflow and other + // such subtleties. + return false; +} + +/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations +/// which some other binary operation distributes over either by factorizing +/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this +/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is +/// a win). Returns the simplified value, or null if it didn't simplify. +Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op + + // Factorization. + if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) { + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Does "X op' Y" always equal "Y op' X"? + bool InnerCommutative = Instruction::isCommutative(InnerOpcode); + + // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? + if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (InnerCommutative && A == D)) { + if (A != C) + std::swap(C, D); + // Consider forming "A op' (B op D)". + // If "B op D" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD); + // If "B op D" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, A, V); + V->takeName(&I); + return V; + } + } + + // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? + if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (InnerCommutative && B == C)) { + if (B != D) + std::swap(C, D); + // Consider forming "(A op C) op' B". + // If "A op C" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD); + // If "A op C" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, V, B); + V->takeName(&I); + return V; + } + } + } + + // Expansion. + if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { + // The instruction has the form "(A op' B) op C". See if expanding it out + // to "(A op C) op' (B op C)" results in simplifications. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Do "A op C" and "B op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. + if ((L == A && R == B) || + (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) + return Op0; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + C = Builder->CreateBinOp(InnerOpcode, L, R); + C->takeName(&I); + return C; + } + } + + if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { + // The instruction has the form "A op (B op' C)". See if expanding it out + // to "(A op B) op' (A op C)" results in simplifications. + Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' + + // Do "A op B" and "A op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. + if ((L == B && R == C) || + (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) + return Op1; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + A = Builder->CreateBinOp(InnerOpcode, L, R); + A->takeName(&I); + return A; + } + } + + return 0; } // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction @@ -184,8 +518,9 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner *IC) { - if (CastInst *CI = dyn_cast<CastInst>(&I)) + if (CastInst *CI = dyn_cast<CastInst>(&I)) { return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); + } // Figure out if the constant is the left or the right argument. bool ConstIsRHS = isa<Constant>(I.getOperand(1)); @@ -202,11 +537,14 @@ std::swap(Op0, Op1); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1); + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1); + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1); + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); llvm_unreachable("Unknown binary instruction type!"); } @@ -224,11 +562,24 @@ // Bool selects with constant operands can be folded to logical ops. if (SI->getType()->isIntegerTy(1)) return 0; + // If it's a bitcast involving vectors, make sure it has the same number of + // elements on both sides. + if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) { + VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); + VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); + + // Verify that either both or neither are vectors. + if ((SrcTy == NULL) != (DestTy == NULL)) return 0; + // If vectors, verify that they have the same number of elements. + if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) + return 0; + } + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); - return SelectInst::Create(SI->getCondition(), SelectTrueVal, - SelectFalseVal); + return SelectInst::Create(SI->getCondition(), + SelectTrueVal, SelectFalseVal); } return 0; } @@ -238,20 +589,25 @@ /// has a PHI node as operand #0, see if we can fold the instruction into the /// PHI (which is only possible if all operands to the PHI are constants). /// -/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms -/// that would normally be unprofitable because they strongly encourage jump -/// threading. -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, - bool AllowAggressive) { - AllowAggressive = false; +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast<PHINode>(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || - // We normally only transform phis with a single use, unless we're trying - // hard to make jump threading happen. - (!PN->hasOneUse() && !AllowAggressive)) + if (NumPHIValues == 0) return 0; + // We normally only transform phis with a single use. However, if a PHI has + // multiple uses and they are all the same operation, we can fold *all* of the + // uses into the PHI. + if (!PN->hasOneUse()) { + // Walk the use list for the instruction, comparing them to I. + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User != &I && !I.isIdenticalTo(User)) + return 0; + } + // Otherwise, we can replace *all* users with the new PHI we form. + } // Check to see if all of the operands of the PHI are simple constants // (constantint/constantfp/undef). If there is one non-constant value, @@ -259,34 +615,48 @@ // bail out. We don't do arbitrary constant expressions here because moving // their computation can be expensive without a cost model. BasicBlock *NonConstBB = 0; - for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa<Constant>(PN->getIncomingValue(i)) || - isa<ConstantExpr>(PN->getIncomingValue(i))) { - if (NonConstBB) return 0; // More than one non-const value. - if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. - NonConstBB = PN->getIncomingBlock(i); - - // If the incoming non-constant value is in I's block, we have an infinite - // loop. - if (NonConstBB == I.getParent()) + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InVal = PN->getIncomingValue(i); + if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal)) + continue; + + if (isa<PHINode>(InVal)) return 0; // Itself a phi. + if (NonConstBB) return 0; // More than one non-const value. + + NonConstBB = PN->getIncomingBlock(i); + + // If the InVal is an invoke at the end of the pred block, then we can't + // insert a computation after it without breaking the edge. + if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) + if (II->getParent() == NonConstBB) return 0; - } + + // If the incoming non-constant value is in I's block, we will remove one + // instruction, but insert another equivalent one, leading to infinite + // instcombine. + if (NonConstBB == I.getParent()) + return 0; + } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0 && !AllowAggressive) { + if (NonConstBB != 0) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); if (!BI || !BI->isUnconditional()) return 0; } // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType()); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); + PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); - + + // If we are going to have to insert a new computation, do so right before the + // predecessors terminator. + if (NonConstBB) + Builder->SetInsertPoint(NonConstBB->getTerminator()); + // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { // We only currently try to fold the condition of a select when it is a phi, @@ -299,61 +669,59 @@ Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, - FalseVInPred, - NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } + else + InV = Builder->CreateSelect(PN->getIncomingValue(i), + TrueVInPred, FalseVInPred, "phitmp"); NewPN->addIncoming(InV, ThisBB); } + } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { + Constant *C = cast<Constant>(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else if (isa<ICmpInst>(CI)) + InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + else + InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } } else if (I.getNumOperands() == 2) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { - if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); - else - InV = ConstantExpr::get(I.getOpcode(), InC, C); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - InV = BinaryOperator::Create(BO->getOpcode(), - PN->getIncomingValue(i), C, - NonConstBB->getTerminator()); - else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), - PN->getIncomingValue(i), C, - NonConstBB->getTerminator()); - else - llvm_unreachable("Unknown binop!"); - - Worklist.Add(cast<Instruction>(InV)); - } + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::get(I.getOpcode(), InC, C); + else + InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(), + PN->getIncomingValue(i), C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { CastInst *CI = cast<CastInst>(&I); - const Type *RetTy = CI->getType(); + Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), - I.getType(), - NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } + else + InV = Builder->CreateCast(CI->getOpcode(), + PN->getIncomingValue(i), I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ) { + Instruction *User = cast<Instruction>(*UI++); + if (User == &I) continue; + ReplaceInstUsesWith(*User, NewPN); + EraseInstFromFunction(*User); + } return ReplaceInstUsesWith(I, NewPN); } @@ -361,7 +729,7 @@ /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; @@ -369,7 +737,7 @@ // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -392,7 +760,7 @@ if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) return 0; - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD->getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -403,7 +771,7 @@ Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); @@ -418,38 +786,53 @@ return Ty; } - +static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && + !Src.hasOneUse()) + return false; + return true; +} Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) + if (Value *V = SimplifyGEPInst(Ops, TD)) return ReplaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); - if (isa<UndefValue>(GEP.getOperand(0))) - return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); - - // Eliminate unneeded casts for indices. + // Eliminate unneeded casts for indices, and replace indices which displace + // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - unsigned PtrSize = TD->getPointerSizeInBits(); - + Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; ++I, ++GTI) { - if (!isa<SequentialType>(*GTI)) continue; - - // If we are using a wider index than needed for this platform, shrink it - // to what we need. If narrower, sign-extend it to what we need. This - // explicit cast can make subsequent optimizations more obvious. - unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); - if (OpBits == PtrSize) - continue; - - *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); - MadeChange = true; + // Skip indices into struct types. + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + if (!SeqTy) continue; + + // If the element type has zero size then any index over it is equivalent + // to an index of zero, so replace it with zero if it is not zero already. + if (SeqTy->getElementType()->isSized() && + TD->getTypeAllocSize(SeqTy->getElementType()) == 0) + if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { + *I = Constant::getNullValue(IntPtrTy); + MadeChange = true; + } + + if ((*I)->getType() != IntPtrTy) { + // If we are using a wider index than needed for this platform, shrink + // it to what we need. If narrower, sign-extend it to what we need. + // This explicit cast can make subsequent optimizations more obvious. + *I = Builder->CreateIntCast(*I, IntPtrTy, true); + MadeChange = true; + } } if (MadeChange) return &GEP; } @@ -459,13 +842,15 @@ // getelementptr instructions into a single instruction. // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) + return 0; + // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. - // - if (GetElementPtrInst *SrcGEP = - dyn_cast<GetElementPtrInst>(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2) + if (GEPOperator *SrcGEP = + dyn_cast<GEPOperator>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return 0; // Wait until our source is folded to completion. SmallVector<Value*, 8> Indices; @@ -495,7 +880,7 @@ // normalized. if (SO1->getType() != GO1->getType()) return 0; - Sum = Builder->CreateAdd(SO1, GO1); + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } // Update the GEP in place if possible. @@ -517,44 +902,43 @@ if (!Indices.empty()) return (GEP.isInBounds() && Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), - Indices.end()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), - Indices.end()); + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, + GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); } - + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - if (StrippedPtr != PtrOp) { - const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + if (StrippedPtr != PtrOp && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); - + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... - // + // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { - const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); - if (const ArrayType *CATy = + PointerType *CPTy = cast<PointerType>(PtrOp->getType()); + if (ArrayType *CATy = dyn_cast<ArrayType>(CPTy->getElementType())) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); GetElementPtrInst *Res = - GetElementPtrInst::Create(StrippedPtr, Idx.begin(), - Idx.end()); + GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); return Res; } - if (const ArrayType *XATy = + if (ArrayType *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { @@ -572,8 +956,8 @@ // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - const Type *SrcElTy = StrippedPtrTy->getElementType(); - const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); + Type *SrcElTy = StrippedPtrTy->getElementType(); + Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); if (TD && SrcElTy->isArrayTy() && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { @@ -581,8 +965,8 @@ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = GEP.getOperand(1); Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2) : - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -632,7 +1016,7 @@ if (Scale->getZExtValue() != 1) { Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), false /*ZExt*/); - NewIdx = Builder->CreateMul(NewIdx, C); + NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); } // Insert the new GEP instruction. @@ -640,15 +1024,15 @@ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = NewIdx; Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2): - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()): + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); } } } } - + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> @@ -656,18 +1040,21 @@ /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { if (TD && - !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); int64_t Offset = OffsetV->getSExtValue(); - + // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (Offset == 0) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa<AllocaInst>(BCI->getOperand(0))) { + if (isa<AllocaInst>(BCI->getOperand(0)) || + isMalloc(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -685,14 +1072,12 @@ // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector<Value*, 8> NewIndices; - const Type *InTy = + Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); if (FindElementAtOffset(InTy, Offset, NewIndices)) { Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -707,18 +1092,95 @@ -static bool IsOnlyNullComparedAndFreed(const Value &V) { - for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end(); +static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users, + int Depth = 0) { + if (Depth == 8) + return false; + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { - const User *U = *UI; - if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U)) - if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) + User *U = *UI; + if (isFreeCall(U)) { + Users.push_back(U); + continue; + } + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) { + Users.push_back(ICI); continue; + } + } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { + Users.push_back(BCI); + continue; + } + } + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { + Users.push_back(GEPI); + continue; + } + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + Users.push_back(II); + continue; + } + } return false; } return true; } +Instruction *InstCombiner::visitMalloc(Instruction &MI) { + // If we have a malloc call which is only used in any amount of comparisons + // to null and free calls, delete the calls and replace the comparisons with + // true or false as appropriate. + SmallVector<WeakVH, 64> Users; + if (IsOnlyNullComparedAndFreed(&MI, Users)) { + for (unsigned i = 0, e = Users.size(); i != e; ++i) { + Instruction *I = cast_or_null<Instruction>(&*Users[i]); + if (!I) continue; + + if (ICmpInst *C = dyn_cast<ICmpInst>(I)) { + ReplaceInstUsesWith(*C, + ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); + } + EraseInstFromFunction(*I); + } + return EraseInstFromFunction(MI); + } + return 0; +} + + + +Instruction *InstCombiner::visitFree(CallInst &FI) { + Value *Op = FI.getArgOperand(0); + + // free undef -> unreachable. + if (isa<UndefValue>(Op)) { + // Insert a new store to null because we cannot modify the CFG here. + Builder->CreateStore(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); + return EraseInstFromFunction(FI); + } + + // If we have 'free null' delete the instruction. This can happen in stl code + // when lots of inlining happens. + if (isa<ConstantPointerNull>(Op)) + return EraseInstFromFunction(FI); + + return 0; +} + + + Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True Value *X = 0; @@ -728,8 +1190,7 @@ !isa<Constant>(X)) { // Swap Destinations and condition... BI.setCondition(X); - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); return &BI; } @@ -744,8 +1205,7 @@ Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -761,8 +1221,7 @@ ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -776,11 +1235,17 @@ if (I->getOpcode() == Instruction::Add) if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) - SI.setOperand(i, - ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), - AddRHS)); - SI.setOperand(0, I->getOperand(0)); + unsigned NumCases = SI.getNumCases(); + // Skip the first item since that's the default case. + for (unsigned i = 1; i < NumCases; ++i) { + ConstantInt* CaseVal = SI.getCaseValue(i); + Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), + AddRHS); + assert(isa<ConstantInt>(NewCaseVal) && + "Result of expression should be constant"); + SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal)); + } + SI.setCondition(I->getOperand(0)); Worklist.Add(I); return &SI; } @@ -807,7 +1272,7 @@ if (EV.getNumIndices() > 1) // Extract the remaining indices out of the constant indexed by the // first index - return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + return ExtractValueInst::Create(V, EV.getIndices().slice(1)); else return ReplaceInstUsesWith(EV, V); } @@ -830,7 +1295,7 @@ // with // %E = extractvalue { i32, { i32 } } %A, 0 return ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); } if (exti == exte && insi == inse) // Both iterators are at the end: Index lists are identical. Replace @@ -848,9 +1313,9 @@ // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - insi, inse); + makeArrayRef(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -862,7 +1327,7 @@ // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - exti, exte); + makeArrayRef(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which @@ -877,16 +1342,24 @@ case Intrinsic::sadd_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); } + + // If the normal result of the add is dead, and the RHS is a constant, + // we can transform this into a range comparison. + // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 + if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow) + if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1))) + return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0), + ConstantExpr::getNot(CI)); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateSub(LHS, RHS); } @@ -895,7 +1368,7 @@ case Intrinsic::smul_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateMul(LHS, RHS); } @@ -905,10 +1378,372 @@ } } } - // Can't simplify extracts from other values. Note that nested extracts are - // already simplified implicitely by the above (extract ( extract (insert) ) + if (LoadInst *L = dyn_cast<LoadInst>(Agg)) + // If the (non-volatile) load only has one use, we can rewrite this to a + // load from a GEP. This reduces the size of the load. + // FIXME: If a load is used only by extractvalue instructions then this + // could be done regardless of having multiple uses. + if (L->isSimple() && L->hasOneUse()) { + // extractvalue has integer indices, getelementptr has Value*s. Convert. + SmallVector<Value*, 4> Indices; + // Prefix an i32 0 since we need the first element. + Indices.push_back(Builder->getInt32(0)); + for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); + I != E; ++I) + Indices.push_back(Builder->getInt32(*I)); + + // We need to insert these at the location of the old load, not at that of + // the extractvalue. + Builder->SetInsertPoint(L->getParent(), L); + Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); + // Returning the load directly will cause the main loop to insert it in + // the wrong spot, so use ReplaceInstUsesWith(). + return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); + } + // We could simplify extracts from other values. Note that nested extracts may + // already be simplified implicitly by the above: extract (extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just - // the value inserted, if appropriate). + // the value inserted, if appropriate. Similarly for extracts from single-use + // loads: extract (extract (load)) will be translated to extract (load (gep)) + // and if again single-use then via load (gep (gep)) to load (gep). + // However, double extracts from e.g. function arguments or return values + // aren't handled yet. + return 0; +} + +enum Personality_Type { + Unknown_Personality, + GNU_Ada_Personality, + GNU_CXX_Personality +}; + +/// RecognizePersonality - See if the given exception handling personality +/// function is one that we understand. If so, return a description of it; +/// otherwise return Unknown_Personality. +static Personality_Type RecognizePersonality(Value *Pers) { + Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); + if (!F) + return Unknown_Personality; + return StringSwitch<Personality_Type>(F->getName()) + .Case("__gnat_eh_personality", GNU_Ada_Personality) + .Case("__gxx_personality_v0", GNU_CXX_Personality) + .Default(Unknown_Personality); +} + +/// isCatchAll - Return 'true' if the given typeinfo will match anything. +static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) { + switch (Personality) { + case Unknown_Personality: + return false; + case GNU_Ada_Personality: + // While __gnat_all_others_value will match any Ada exception, it doesn't + // match foreign exceptions (or didn't, before gcc-4.7). + return false; + case GNU_CXX_Personality: + return TypeInfo->isNullValue(); + } + llvm_unreachable("Unknown personality!"); +} + +static bool shorter_filter(const Value *LHS, const Value *RHS) { + return + cast<ArrayType>(LHS->getType())->getNumElements() + < + cast<ArrayType>(RHS->getType())->getNumElements(); +} + +Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { + // The logic here should be correct for any real-world personality function. + // However if that turns out not to be true, the offending logic can always + // be conditioned on the personality function, like the catch-all logic is. + Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn()); + + // Simplify the list of clauses, eg by removing repeated catch clauses + // (these are often created by inlining). + bool MakeNewInstruction = false; // If true, recreate using the following: + SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction; + bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. + + SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. + for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { + bool isLastClause = i + 1 == e; + if (LI.isCatch(i)) { + // A catch clause. + Value *CatchClause = LI.getClause(i); + Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts()); + + // If we already saw this clause, there is no point in having a second + // copy of it. + if (AlreadyCaught.insert(TypeInfo)) { + // This catch clause was not already seen. + NewClauses.push_back(CatchClause); + } else { + // Repeated catch clause - drop the redundant copy. + MakeNewInstruction = true; + } + + // If this is a catch-all then there is no point in keeping any following + // clauses or marking the landingpad as having a cleanup. + if (isCatchAll(Personality, TypeInfo)) { + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + } else { + // A filter clause. If any of the filter elements were already caught + // then they can be dropped from the filter. It is tempting to try to + // exploit the filter further by saying that any typeinfo that does not + // occur in the filter can't be caught later (and thus can be dropped). + // However this would be wrong, since typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some + // class derived from it). + assert(LI.isFilter(i) && "Unsupported landingpad clause!"); + Value *FilterClause = LI.getClause(i); + ArrayType *FilterType = cast<ArrayType>(FilterClause->getType()); + unsigned NumTypeInfos = FilterType->getNumElements(); + + // An empty filter catches everything, so there is no point in keeping any + // following clauses or marking the landingpad as having a cleanup. By + // dealing with this case here the following code is made a bit simpler. + if (!NumTypeInfos) { + NewClauses.push_back(FilterClause); + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + + bool MakeNewFilter = false; // If true, make a new filter. + SmallVector<Constant *, 16> NewFilterElts; // New elements. + if (isa<ConstantAggregateZero>(FilterClause)) { + // Not an empty filter - it contains at least one null typeinfo. + assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); + Constant *TypeInfo = + Constant::getNullValue(FilterType->getElementType()); + // If this typeinfo is a catch-all then the filter can never match. + if (isCatchAll(Personality, TypeInfo)) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // There is no point in having multiple copies of this typeinfo, so + // discard all but the first copy if there is more than one. + NewFilterElts.push_back(TypeInfo); + if (NumTypeInfos > 1) + MakeNewFilter = true; + } else { + ConstantArray *Filter = cast<ConstantArray>(FilterClause); + SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. + NewFilterElts.reserve(NumTypeInfos); + + // Remove any filter elements that were already caught or that already + // occurred in the filter. While there, see if any of the elements are + // catch-alls. If so, the filter can be discarded. + bool SawCatchAll = false; + for (unsigned j = 0; j != NumTypeInfos; ++j) { + Value *Elt = Filter->getOperand(j); + Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts()); + if (isCatchAll(Personality, TypeInfo)) { + // This element is a catch-all. Bail out, noting this fact. + SawCatchAll = true; + break; + } + if (AlreadyCaught.count(TypeInfo)) + // Already caught by an earlier clause, so having it in the filter + // is pointless. + continue; + // There is no point in having multiple copies of the same typeinfo in + // a filter, so only add it if we didn't already. + if (SeenInFilter.insert(TypeInfo)) + NewFilterElts.push_back(cast<Constant>(Elt)); + } + // A filter containing a catch-all cannot match anything by definition. + if (SawCatchAll) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // If we dropped something from the filter, make a new one. + if (NewFilterElts.size() < NumTypeInfos) + MakeNewFilter = true; + } + if (MakeNewFilter) { + FilterType = ArrayType::get(FilterType->getElementType(), + NewFilterElts.size()); + FilterClause = ConstantArray::get(FilterType, NewFilterElts); + MakeNewInstruction = true; + } + + NewClauses.push_back(FilterClause); + + // If the new filter is empty then it will catch everything so there is + // no point in keeping any following clauses or marking the landingpad + // as having a cleanup. The case of the original filter being empty was + // already handled above. + if (MakeNewFilter && !NewFilterElts.size()) { + assert(MakeNewInstruction && "New filter but not a new instruction!"); + CleanupFlag = false; + break; + } + } + } + + // If several filters occur in a row then reorder them so that the shortest + // filters come first (those with the smallest number of elements). This is + // advantageous because shorter filters are more likely to match, speeding up + // unwinding, but mostly because it increases the effectiveness of the other + // filter optimizations below. + for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { + unsigned j; + // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. + for (j = i; j != e; ++j) + if (!isa<ArrayType>(NewClauses[j]->getType())) + break; + + // Check whether the filters are already sorted by length. We need to know + // if sorting them is actually going to do anything so that we only make a + // new landingpad instruction if it does. + for (unsigned k = i; k + 1 < j; ++k) + if (shorter_filter(NewClauses[k+1], NewClauses[k])) { + // Not sorted, so sort the filters now. Doing an unstable sort would be + // correct too but reordering filters pointlessly might confuse users. + std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, + shorter_filter); + MakeNewInstruction = true; + break; + } + + // Look for the next batch of filters. + i = j + 1; + } + + // If typeinfos matched if and only if equal, then the elements of a filter L + // that occurs later than a filter F could be replaced by the intersection of + // the elements of F and L. In reality two typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some class + // derived from it) so it would be wrong to perform this transform in general. + // However the transform is correct and useful if F is a subset of L. In that + // case L can be replaced by F, and thus removed altogether since repeating a + // filter is pointless. So here we look at all pairs of filters F and L where + // L follows F in the list of clauses, and remove L if every element of F is + // an element of L. This can occur when inlining C++ functions with exception + // specifications. + for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { + // Examine each filter in turn. + Value *Filter = NewClauses[i]; + ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType()); + if (!FTy) + // Not a filter - skip it. + continue; + unsigned FElts = FTy->getNumElements(); + // Examine each filter following this one. Doing this backwards means that + // we don't have to worry about filters disappearing under us when removed. + for (unsigned j = NewClauses.size() - 1; j != i; --j) { + Value *LFilter = NewClauses[j]; + ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType()); + if (!LTy) + // Not a filter - skip it. + continue; + // If Filter is a subset of LFilter, i.e. every element of Filter is also + // an element of LFilter, then discard LFilter. + SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j; + // If Filter is empty then it is a subset of LFilter. + if (!FElts) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + // Move on to the next filter. + continue; + } + unsigned LElts = LTy->getNumElements(); + // If Filter is longer than LFilter then it cannot be a subset of it. + if (FElts > LElts) + // Move on to the next filter. + continue; + // At this point we know that LFilter has at least one element. + if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros. + // Filter is a subset of LFilter iff Filter contains only zeros (as we + // already know that Filter is not longer than LFilter). + if (isa<ConstantAggregateZero>(Filter)) { + assert(FElts <= LElts && "Should have handled this case earlier!"); + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + continue; + } + ConstantArray *LArray = cast<ConstantArray>(LFilter); + if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros. + // Since Filter is non-empty and contains only zeros, it is a subset of + // LFilter iff LFilter contains a zero. + assert(FElts > 0 && "Should have eliminated the empty filter earlier!"); + for (unsigned l = 0; l != LElts; ++l) + if (LArray->getOperand(l)->isNullValue()) { + // LFilter contains a zero - discard it. + NewClauses.erase(J); + MakeNewInstruction = true; + break; + } + // Move on to the next filter. + continue; + } + // At this point we know that both filters are ConstantArrays. Loop over + // operands to see whether every element of Filter is also an element of + // LFilter. Since filters tend to be short this is probably faster than + // using a method that scales nicely. + ConstantArray *FArray = cast<ConstantArray>(Filter); + bool AllFound = true; + for (unsigned f = 0; f != FElts; ++f) { + Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts(); + AllFound = false; + for (unsigned l = 0; l != LElts; ++l) { + Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts(); + if (LTypeInfo == FTypeInfo) { + AllFound = true; + break; + } + } + if (!AllFound) + break; + } + if (AllFound) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + } + } + + // If we changed any of the clauses, replace the old landingpad instruction + // with a new one. + if (MakeNewInstruction) { + LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), + LI.getPersonalityFn(), + NewClauses.size()); + for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) + NLI->addClause(NewClauses[i]); + // A landing pad with no clauses must have the cleanup flag set. It is + // theoretically possible, though highly unlikely, that we eliminated all + // clauses. If so, force the cleanup flag to true. + if (NewClauses.empty()) + CleanupFlag = true; + NLI->setCleanup(CleanupFlag); + return NLI; + } + + // Even if none of the clauses changed, we may nonetheless have understood + // that the cleanup flag is pointless. Clear it if so. + if (LI.isCleanup() != CleanupFlag) { + assert(!CleanupFlag && "Adding a cleanup, not removing one?!"); + LI.setCleanup(CleanupFlag); + return &LI; + } + return 0; } @@ -923,7 +1758,8 @@ assert(I->hasOneUse() && "Invariants didn't hold!"); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) + if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() || + isa<TerminatorInst>(I)) return false; // Do not sink alloca instructions out of the entry block. @@ -940,8 +1776,7 @@ return false; } - BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); - + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(InsertPos); ++NumSunkInst; return true; @@ -964,12 +1799,10 @@ bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; Worklist.push_back(BB); - - std::vector<Instruction*> InstrsForInstCombineWorklist; - InstrsForInstCombineWorklist.reserve(128); - SmallPtrSet<ConstantExpr*, 64> FoldedConstants; - + SmallVector<Instruction*, 128> InstrsForInstCombineWorklist; + DenseMap<ConstantExpr*, Constant*> FoldedConstants; + do { BB = Worklist.pop_back_val(); @@ -1004,14 +1837,15 @@ i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(i); if (CE == 0) continue; - - // If we already folded this constant, don't try again. - if (!FoldedConstants.insert(CE)) - continue; - - Constant *NewC = ConstantFoldConstantExpression(CE, TD); - if (NewC && NewC != CE) { - *i = NewC; + + Constant*& FoldRes = FoldedConstants[CE]; + if (!FoldRes) + FoldRes = ConstantFoldConstantExpression(CE, TD); + if (!FoldRes) + FoldRes = CE; + + if (FoldRes != CE) { + *i = FoldRes; MadeIRChange = true; } } @@ -1077,27 +1911,29 @@ // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents // the instcombine code from having to deal with some bad special cases. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (!Visited.count(BB)) { - Instruction *Term = BB->getTerminator(); - while (Term != BB->begin()) { // Remove instrs bottom-up - BasicBlock::iterator I = Term; --I; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (Visited.count(BB)) continue; - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - // A debug intrinsic shouldn't force another iteration if we weren't - // going to do one without it. - if (!ISA_DEBUG_INFO_INTRINSIC(I)) { - ++NumDeadInst; - MadeIRChange = true; - } - - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I->getType()->isVoidTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; } + if (!isa<DbgInfoIntrinsic>(Inst)) { + ++NumDeadInst; + MadeIRChange = true; + } + Inst->eraseFromParent(); } + } } while (!Worklist.isEmpty()) { @@ -1158,6 +1994,7 @@ // Now that we have an instruction, try combining it to simplify it. Builder->SetInsertPoint(I->getParent(), I); + Builder->SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; @@ -1172,23 +2009,26 @@ DEBUG(errs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); + if (!I->getDebugLoc().isUnknown()) + Result->setDebugLoc(I->getDebugLoc()); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); + // Move the name to the new instruction first. + Result->takeName(I); + // Push the new instruction and any users onto the worklist. Worklist.Add(Result); Worklist.AddUsersToWorkList(*Result); - // Move the name to the new instruction first. - Result->takeName(I); - // Insert the new instruction into the basic block... BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I; - if (!isa<PHINode>(Result)) // If combining a PHI, don't insert - while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. - ++InsertPos; + // If we replace a PHI with something that isn't a PHI, fix up the + // insertion point. + if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos)) + InsertPos = InstParent->getFirstInsertionPt(); InstParent->getInstList().insert(InsertPos, Result); @@ -1218,19 +2058,22 @@ bool InstCombiner::runOnFunction(Function &F) { - MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); TD = getAnalysisIfAvailable<TargetData>(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. - IRBuilder<TargetFolder, InstCombineIRInserter> + IRBuilder<true, TargetFolder, InstCombineIRInserter> TheBuilder(F.getContext(), TargetFolder(TD), InstCombineIRInserter(Worklist)); Builder = &TheBuilder; bool EverMadeChange = false; + // Lower dbg.declare intrinsics otherwise their value may be clobbered + // by instcombiner. + EverMadeChange = LowerDbgDeclare(F); + // Iterate while there is work to do. unsigned Iteration = 0; while (DoOneIteration(F, Iteration++))
diff --git a/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj new file mode 100644 index 0000000..2fa2c4c --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj
@@ -0,0 +1,376 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|Win32"> + <Configuration>Profile</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|x64"> + <Configuration>Profile</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{D35C7204-D4E0-4EE5-8B6D-BA1B589F5D36}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>LLVMInstCombine</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMInstCombine.pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <OmitFramePointers>false</OmitFramePointers> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="InstructionCombining.cpp" /> + <ClCompile Include="InstCombineAddSub.cpp" /> + <ClCompile Include="InstCombineAndOrXor.cpp" /> + <ClCompile Include="InstCombineCalls.cpp" /> + <ClCompile Include="InstCombineCasts.cpp" /> + <ClCompile Include="InstCombineCompares.cpp" /> + <ClCompile Include="InstCombineLoadStoreAlloca.cpp" /> + <ClCompile Include="InstCombineMulDivRem.cpp" /> + <ClCompile Include="InstCombinePHI.cpp" /> + <ClCompile Include="InstCombineSelect.cpp" /> + <ClCompile Include="InstCombineShifts.cpp" /> + <ClCompile Include="InstCombineSimplifyDemanded.cpp" /> + <ClCompile Include="InstCombineVectorOps.cpp" /> + <ClInclude Include="InstCombine.h" /> + <ClInclude Include="InstCombineWorklist.h" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj"> + <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project> + </ProjectReference> + <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj"> + <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project> + </ProjectReference> + <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj"> + <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project> + </ProjectReference> + <ProjectReference Include="..\..\Target/LLVMTarget.vcxproj"> + <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project> + </ProjectReference> + <ProjectReference Include="..\Utils/LLVMTransformUtils.vcxproj"> + <Project>5C514254-58EE-4850-8743-F5D7BEAA3E66</Project> + </ProjectReference> + <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj"> + <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj.filters b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj.filters new file mode 100644 index 0000000..a6ead53 --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj.filters
@@ -0,0 +1,66 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <ClCompile Include="InstructionCombining.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineAddSub.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineAndOrXor.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineCalls.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineCasts.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineCompares.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineLoadStoreAlloca.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineMulDivRem.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombinePHI.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineSelect.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineShifts.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineSimplifyDemanded.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineVectorOps.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="InstCombine.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="InstCombineWorklist.h"> + <Filter>Header Files</Filter> + </ClInclude> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier> + </Filter> + <Filter Include="Header Files"> + <UniqueIdentifier>{CFA0CD99-0550-4E94-A4D9-080C3F5D695C}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj new file mode 100644 index 0000000..62e3273 --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>