Update SwiftShader to April code dump. April code dump from Transgaming. Adds new shader compiler.
diff --git a/src/LLVM/lib/Transforms/Hello/Hello.cpp b/src/LLVM/lib/Transforms/Hello/Hello.cpp index 838d550..b0e22de 100644 --- a/src/LLVM/lib/Transforms/Hello/Hello.cpp +++ b/src/LLVM/lib/Transforms/Hello/Hello.cpp
@@ -37,7 +37,7 @@ } char Hello::ID = 0; -INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false); +static RegisterPass<Hello> X("hello", "Hello World Pass"); namespace { // Hello2 - The second implementation with getAnalysisUsage implemented. @@ -60,6 +60,5 @@ } char Hello2::ID = 0; -INITIALIZE_PASS(Hello2, "hello2", - "Hello World Pass (with getAnalysisUsage implemented)", - false, false); +static RegisterPass<Hello2> +Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
diff --git a/src/LLVM/lib/Transforms/Hello/Hello.exports b/src/LLVM/lib/Transforms/Hello/Hello.exports new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/LLVM/lib/Transforms/Hello/Hello.exports
diff --git a/src/LLVM/lib/Transforms/Hello/INSTALL.vcxproj b/src/LLVM/lib/Transforms/Hello/INSTALL.vcxproj new file mode 100644 index 0000000..1b2f4c9 --- /dev/null +++ b/src/LLVM/lib/Transforms/Hello/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Hello/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/Hello/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/Hello/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Hello/LLVMHello.vcxproj b/src/LLVM/lib/Transforms/Hello/LLVMHello.vcxproj new file mode 100644 index 0000000..f2cd27a --- /dev/null +++ b/src/LLVM/lib/Transforms/Hello/LLVMHello.vcxproj
@@ -0,0 +1,208 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{935A4E63-44A0-43BB-BAB3-50BB14735609}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>LLVMHello</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>Utility</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>Utility</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType>Utility</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType>Utility</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\LLVMHello.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/LLVMHello.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\LLVMHello</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/LLVMHello.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\LLVMHello</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/LLVMHello.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\LLVMHello</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/LLVMHello.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\LLVMHello</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <None Include="CMakeFiles\LLVMHello" /> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Hello/LLVMHello.vcxproj.filters b/src/LLVM/lib/Transforms/Hello/LLVMHello.vcxproj.filters new file mode 100644 index 0000000..b89b070 --- /dev/null +++ b/src/LLVM/lib/Transforms/Hello/LLVMHello.vcxproj.filters
@@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\LLVMHello.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + <None Include="CMakeFiles\LLVMHello" /> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Hello/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/Hello/PACKAGE.vcxproj new file mode 100644 index 0000000..7cdb79b --- /dev/null +++ b/src/LLVM/lib/Transforms/Hello/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Hello;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Hello/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/Hello/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/Hello/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/INSTALL.vcxproj b/src/LLVM/lib/Transforms/INSTALL.vcxproj new file mode 100644 index 0000000..00466f3 --- /dev/null +++ b/src/LLVM/lib/Transforms/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/IPO/ArgumentPromotion.cpp b/src/LLVM/lib/Transforms/IPO/ArgumentPromotion.cpp index 0c77e1f..e160f63 100644 --- a/src/LLVM/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/src/LLVM/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -39,7 +39,6 @@ #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -67,7 +66,9 @@ virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) {} + : CallGraphSCCPass(ID), maxElements(maxElements) { + initializeArgPromotionPass(*PassRegistry::getPassRegistry()); + } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; @@ -84,8 +85,12 @@ } char ArgPromotion::ID = 0; -INITIALIZE_PASS(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false); +INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); @@ -130,47 +135,74 @@ if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't - // transform functions that have indirect callers. - if (F->hasAddressTaken()) - return 0; - + // transform functions that have indirect callers. Also see if the function + // is self-recursive. + bool isSelfRecursive = false; + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); + UI != E; ++UI) { + CallSite CS(*UI); + // Must be a direct call. + if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0; + + if (CS.getInstruction()->getParent()->getParent() == F) + isSelfRecursive = true; + } + // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); + Argument *PtrArg = PointerArgs[i].first; + Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. - Argument *PtrArg = PointerArgs[i].first; if (isByVal) { - const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); - if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); - } else { - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - if (!STy->getElementType(i)->isSingleValueType()) { - AllSimple = false; - break; - } - - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow scalarrepl to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; + continue; + } + + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + if (!STy->getElementType(i)->isSingleValueType()) { + AllSimple = false; + break; } } + + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow scalarrepl to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; + } } } + // If the argument is a recursive type and we're in a recursive + // function, we could end up infinitely peeling the function argument. + if (isSelfRecursive) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { + bool RecursiveType = false; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + if (STy->getElementType(i) == PtrArg->getType()) { + RecursiveType = true; + break; + } + } + if (RecursiveType) + continue; + } + } + // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, isByVal)) ArgsToPromote.insert(PtrArg); @@ -183,22 +215,9 @@ return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } -/// IsAlwaysValidPointer - Return true if the specified pointer is always legal -/// to load. -static bool IsAlwaysValidPointer(Value *V) { - if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true; - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) - return IsAlwaysValidPointer(GEP->getOperand(0)); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::GetElementPtr) - return IsAlwaysValidPointer(CE->getOperand(0)); - - return false; -} - -/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that +/// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { Function *Callee = Arg->getParent(); unsigned ArgNo = std::distance(Callee->arg_begin(), @@ -211,7 +230,7 @@ CallSite CS(*UI); assert(CS && "Should only have direct calls!"); - if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) return false; } return true; @@ -318,7 +337,7 @@ GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByVal || AllCalleesPassInValidPointerForArgument(Arg)) + if (isByVal || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -363,7 +382,8 @@ User *U = *UI; Operands.clear(); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - if (LI->isVolatile()) return false; // Don't hack volatile loads + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); @@ -391,7 +411,8 @@ for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); UI != E; ++UI) if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { - if (LI->isVolatile()) return false; // Don't hack volatile loads + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; Loads.push_back(LI); } else { // Other uses than load? @@ -434,8 +455,6 @@ SmallPtrSet<BasicBlock*, 16> TranspBlocks; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - TargetData *TD = getAnalysisIfAvailable<TargetData>(); - if (!TD) return false; // Without TargetData, assume the worst. for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to @@ -443,11 +462,8 @@ LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); - const PointerType *LoadTy = - cast<PointerType>(Load->getPointerOperand()->getType()); - unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); - - if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) + AliasAnalysis::Location Loc = AA.getLocation(Load); + if (AA.canInstructionRangeModify(BB->front(), *Load, Loc)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. @@ -458,7 +474,7 @@ for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > I = idf_ext_begin(P, TranspBlocks), E = idf_ext_end(P, TranspBlocks); I != E; ++I) - if (AA.canBasicBlockModify(**I, Arg, LoadSize)) + if (AA.canBasicBlockModify(**I, Loc)) return false; } } @@ -478,8 +494,8 @@ // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. - const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + FunctionType *FTy = F->getFunctionType(); + std::vector<Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; @@ -513,8 +529,8 @@ ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - const StructType *STy = cast<StructType>(AgTy); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; @@ -562,9 +578,7 @@ for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), - SI->begin(), - SI->end())); + Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI)); assert(Params.back()); } @@ -579,7 +593,7 @@ if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); - const Type *RetTy = FTy->getReturnType(); + Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. @@ -648,13 +662,13 @@ } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - const StructType *STy = cast<StructType>(AgTy); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, + Value *Idx = GetElementPtrInst::Create(*AI, Idxs, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? @@ -672,12 +686,12 @@ LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); - const Type *ElTy = V->getType(); + Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. - const Type *IdxTy = (ElTy->isStructTy() ? + Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); @@ -685,8 +699,7 @@ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), - V->getName()+".idx", Call); + V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } @@ -694,6 +707,9 @@ // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); + // Transfer the TBAA info too. + newLoad->setMetadata(LLVMContext::MD_tbaa, + OrigLoad->getMetadata(LLVMContext::MD_tbaa)); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } @@ -716,12 +732,12 @@ Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); @@ -754,8 +770,8 @@ // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { @@ -775,16 +791,16 @@ Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); - const StructType *STy = cast<StructType>(AgTy); + StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = - GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, + GetElementPtrInst::Create(TheAlloca, Idxs, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i));
diff --git a/src/LLVM/lib/Transforms/IPO/ConstantMerge.cpp b/src/LLVM/lib/Transforms/IPO/ConstantMerge.cpp index 75282fa..c3ecb7a 100644 --- a/src/LLVM/lib/Transforms/IPO/ConstantMerge.cpp +++ b/src/LLVM/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,10 +19,14 @@ #define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -31,25 +35,80 @@ namespace { struct ConstantMerge : public ModulePass { static char ID; // Pass identification, replacement for typeid - ConstantMerge() : ModulePass(ID) {} + ConstantMerge() : ModulePass(ID) { + initializeConstantMergePass(*PassRegistry::getPassRegistry()); + } - // run - For this pass, process all of the globals in the module, - // eliminating duplicate constants. - // + // For this pass, process all of the globals in the module, eliminating + // duplicate constants. bool runOnModule(Module &M); + + // Return true iff we can determine the alignment of this global variable. + bool hasKnownAlignment(GlobalVariable *GV) const; + + // Return the alignment of the global, including converting the default + // alignment to a concrete value. + unsigned getAlignment(GlobalVariable *GV) const; + + const TargetData *TD; }; } char ConstantMerge::ID = 0; INITIALIZE_PASS(ConstantMerge, "constmerge", - "Merge Duplicate Global Constants", false, false); + "Merge Duplicate Global Constants", false, false) ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } + + +/// Find values that are marked as llvm.used. +static void FindUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet<const GlobalValue*, 8> &UsedValues) { + if (LLVMUsed == 0) return; + ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); + if (Inits == 0) return; + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); +} + +// True if A is better than B. +static bool IsBetterCannonical(const GlobalVariable &A, + const GlobalVariable &B) { + if (!A.hasLocalLinkage() && B.hasLocalLinkage()) + return true; + + if (A.hasLocalLinkage() && !B.hasLocalLinkage()) + return false; + + return A.hasUnnamedAddr(); +} + +bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const { + return TD || GV->getAlignment() != 0; +} + +unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { + if (TD) + return TD->getPreferredAlignment(GV); + return GV->getAlignment(); +} + bool ConstantMerge::runOnModule(Module &M) { - // Map unique constant/section pairs to globals. We don't want to merge - // globals in different sections. - DenseMap<Constant*, GlobalVariable*> CMap; + TD = getAnalysisIfAvailable<TargetData>(); + + // Find all the globals that are marked "used". These cannot be merged. + SmallPtrSet<const GlobalValue*, 8> UsedGlobals; + FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); + FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); + + // Map unique <constants, has-unknown-alignment> pairs to globals. We don't + // want to merge globals of unknown alignment with those of explicit + // alignment. If we have TargetData, we always know the alignment. + DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap; // Replacements - This vector contains a list of replacements to perform. SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; @@ -61,40 +120,78 @@ // second level constants have initializers which point to the globals that // were just merged. while (1) { - // First pass: identify all globals that can be merged together, filling in - // the Replacements vector. We cannot do the replacement in this pass - // because doing so may cause initializers of other globals to be rewritten, - // invalidating the Constant* pointers in CMap. - // + + // First: Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; - + // If this GV is dead, remove it. GV->removeDeadConstantUsers(); if (GV->use_empty() && GV->hasLocalLinkage()) { GV->eraseFromParent(); continue; } - - // Only process constants with initializers in the default addres space. - if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || - GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty()) + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) continue; - + Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - GlobalVariable *&Slot = CMap[Init]; + PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); + GlobalVariable *&Slot = CMap[Pair]; - if (Slot == 0) { // Nope, add it to the map. + // If this is the first constant we find or if the old on is local, + // replace with the current one. It the current is externally visible + // it cannot be replace, but can be the canonical constant we merge with. + if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) { Slot = GV; - } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! - // Make all uses of the duplicate constant use the canonical version. - Replacements.push_back(std::make_pair(GV, Slot)); } } + // Second: identify all globals that can be merged together, filling in + // the Replacements vector. We cannot do the replacement in this pass + // because doing so may cause initializers of other globals to be rewritten, + // invalidating the Constant* pointers in CMap. + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + + // Only process constants with initializers in the default address space. + if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) + continue; + + // We can only replace constant with local linkage. + if (!GV->hasLocalLinkage()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); + GlobalVariable *Slot = CMap[Pair]; + + if (!Slot || Slot == GV) + continue; + + if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr()) + continue; + + if (!GV->hasUnnamedAddr()) + Slot->setUnnamedAddr(false); + + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); + } + if (Replacements.empty()) return MadeChange; CMap.clear(); @@ -103,10 +200,20 @@ // now. This avoid invalidating the pointers in CMap, which are unneeded // now. for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { + // Bump the alignment if necessary. + if (Replacements[i].first->getAlignment() || + Replacements[i].second->getAlignment()) { + Replacements[i].second->setAlignment(std::max( + Replacements[i].first->getAlignment(), + Replacements[i].second->getAlignment())); + } + // Eliminate any uses of the dead global. Replacements[i].first->replaceAllUsesWith(Replacements[i].second); // Delete the global value from the module. + assert(Replacements[i].first->hasLocalLinkage() && + "Refusing to delete an externally visible global variable."); Replacements[i].first->eraseFromParent(); }
diff --git a/src/LLVM/lib/Transforms/IPO/DeadArgumentElimination.cpp b/src/LLVM/lib/Transforms/IPO/DeadArgumentElimination.cpp index 47df235..4bb6f7a 100644 --- a/src/LLVM/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/src/LLVM/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -39,7 +39,8 @@ STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); - +STATISTIC(NumArgumentsReplacedWithUndef, + "Number of unread args replaced with undef"); namespace { /// DAE - The dead argument elimination pass. /// @@ -48,7 +49,7 @@ /// Struct that represents (part of) either a return value or a function /// argument. Used so that arguments and return values can be used - /// interchangably. + /// interchangeably. struct RetOrArg { RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), IsArg(IsArg) {} @@ -126,7 +127,9 @@ public: static char ID; // Pass identification, replacement for typeid - DAE() : ModulePass(ID) {} + DAE() : ModulePass(ID) { + initializeDAEPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -146,12 +149,13 @@ void PropagateLiveness(const RetOrArg &RA); bool RemoveDeadStuffFromFunction(Function *F); bool DeleteDeadVarargs(Function &Fn); + bool RemoveDeadArgumentsFromCallers(Function &Fn); }; } char DAE::ID = 0; -INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false); +INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false) namespace { /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but @@ -168,7 +172,7 @@ char DAH::ID = 0; INITIALIZE_PASS(DAH, "deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", - false, false); + false, false) /// createDeadArgEliminationPass - This pass removes arguments from functions /// which are not used by the body of the function. @@ -202,9 +206,9 @@ // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. - const FunctionType *FTy = Fn.getFunctionType(); + FunctionType *FTy = Fn.getFunctionType(); - std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); + std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); @@ -240,11 +244,11 @@ Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(PAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(PAL); if (cast<CallInst>(Call)->isTailCall()) @@ -269,8 +273,8 @@ // function empty. NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. While we're at + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. While we're at // it, remove the dead arguments from the DeadArguments list. // for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), @@ -285,13 +289,62 @@ return true; } +/// RemoveDeadArgumentsFromCallers - Checks if the given function has any +/// arguments that are unused, and changes the caller parameters to be undefined +/// instead. +bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) +{ + if (Fn.isDeclaration() || Fn.mayBeOverridden()) + return false; + + // Functions with local linkage should already have been handled. + if (Fn.hasLocalLinkage()) + return false; + + if (Fn.use_empty()) + return false; + + llvm::SmallVector<unsigned, 8> UnusedArgs; + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); + I != E; ++I) { + Argument *Arg = I; + + if (Arg->use_empty() && !Arg->hasByValAttr()) + UnusedArgs.push_back(Arg->getArgNo()); + } + + if (UnusedArgs.empty()) + return false; + + bool Changed = false; + + for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end(); + I != E; ++I) { + CallSite CS(*I); + if (!CS || !CS.isCallee(I)) + continue; + + // Now go through all unused args and replace them with "undef". + for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) { + unsigned ArgNo = UnusedArgs[I]; + + Value *Arg = CS.getArgument(ArgNo); + CS.setArgument(ArgNo, UndefValue::get(Arg->getType())); + ++NumArgumentsReplacedWithUndef; + Changed = true; + } + } + + return Changed; +} + /// Convenience function that returns the number of return values. It returns 0 /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. static unsigned NumRetVals(const Function *F) { if (F->getReturnType()->isVoidTy()) return 0; - else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) + else if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) return STy->getNumElements(); else return 1; @@ -326,7 +379,7 @@ // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the - // orginal Use. + // original Use. RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); @@ -438,7 +491,7 @@ // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; - const Type *STy = dyn_cast<StructType>(F.getReturnType()); + Type *STy = dyn_cast<StructType>(F.getReturnType()); // Loop all uses of the function. for (Value::const_use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) { @@ -593,8 +646,8 @@ // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. - const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + FunctionType *FTy = F->getFunctionType(); + std::vector<Type*> Params; // Set up to build a new list of parameter attributes. SmallVector<AttributeWithIndex, 8> AttributesVec; @@ -606,17 +659,17 @@ // Find out the new return value. - const Type *RetTy = FTy->getReturnType(); - const Type *NRetTy = NULL; + Type *RetTy = FTy->getReturnType(); + Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); - std::vector<const Type*> RetTypes; + std::vector<Type*> RetTypes; if (RetTy->isVoidTy()) { NRetTy = RetTy; } else { - const StructType *STy = dyn_cast<StructType>(RetTy); + StructType *STy = dyn_cast<StructType>(RetTy); if (STy) // Look at each of the original return values individually. for (unsigned i = 0; i != RetCount; ++i) { @@ -769,11 +822,11 @@ Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewCallPAL); if (cast<CallInst>(Call)->isTailCall()) @@ -791,7 +844,8 @@ } else if (New->getType()->isVoidTy()) { // Our return value has uses, but they will get removed later on. // Replace by null for now. - Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + if (!Call->getType()->isX86_MMXTy()) + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); } else { assert(RetTy->isStructTy() && "Return type changed, but not into a void. The old return type" @@ -840,8 +894,8 @@ // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++i) @@ -854,7 +908,8 @@ } else { // If this argument is dead, replace any uses of it with null constants // (these are guaranteed to become unused later on). - I->replaceAllUsesWith(Constant::getNullValue(I->getType())); + if (!I->getType()->isX86_MMXTy()) + I->replaceAllUsesWith(Constant::getNullValue(I->getType())); } // If we change the return value of the function we must rewrite any return @@ -935,5 +990,14 @@ Function *F = I++; Changed |= RemoveDeadStuffFromFunction(F); } + + // Finally, look for any unused parameters in functions with non-local + // linkage and replace the passed in parameters with undef. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function& F = *I; + + Changed |= RemoveDeadArgumentsFromCallers(F); + } + return Changed; }
diff --git a/src/LLVM/lib/Transforms/IPO/ExtractGV.cpp b/src/LLVM/lib/Transforms/IPO/ExtractGV.cpp index 9eb690d..d9911bf 100644 --- a/src/LLVM/lib/Transforms/IPO/ExtractGV.cpp +++ b/src/LLVM/lib/Transforms/IPO/ExtractGV.cpp
@@ -17,15 +17,15 @@ #include "llvm/Pass.h" #include "llvm/Constants.h" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SetVector.h" #include <algorithm> using namespace llvm; namespace { /// @brief A pass to extract specific functions and their dependencies. class GVExtractorPass : public ModulePass { - std::vector<GlobalValue*> Named; + SetVector<GlobalValue *> Named; bool deleteStuff; - bool reLink; public: static char ID; // Pass identification, replacement for typeid @@ -33,133 +33,50 @@ /// specified function. Otherwise, it deletes as much of the module as /// possible, except for the function specified. /// - explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true, - bool relinkCallees = false) - : ModulePass(ID), Named(GVs), deleteStuff(deleteS), - reLink(relinkCallees) {} + explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true) + : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} bool runOnModule(Module &M) { - if (Named.size() == 0) { - return false; // Nothing to extract - } - - - if (deleteStuff) - return deleteGV(); - M.setModuleInlineAsm(""); - return isolateGV(M); - } + // Visit the global inline asm. + if (!deleteStuff) + M.setModuleInlineAsm(""); - bool deleteGV() { - for (std::vector<GlobalValue*>::iterator GI = Named.begin(), - GE = Named.end(); GI != GE; ++GI) { - if (Function* NamedFunc = dyn_cast<Function>(*GI)) { - // If we're in relinking mode, set linkage of all internal callees to - // external. This will allow us extract function, and then - link - // everything together - if (reLink) { - for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end(); - B != BE; ++B) { - for (BasicBlock::iterator I = B->begin(), E = B->end(); - I != E; ++I) { - if (CallInst* callInst = dyn_cast<CallInst>(&*I)) { - Function* Callee = callInst->getCalledFunction(); - if (Callee && Callee->hasLocalLinkage()) - Callee->setLinkage(GlobalValue::ExternalLinkage); - } - } - } - } - - NamedFunc->setLinkage(GlobalValue::ExternalLinkage); - NamedFunc->deleteBody(); - assert(NamedFunc->isDeclaration() && "This didn't make the function external!"); - } else { - if (!(*GI)->isDeclaration()) { - cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer - (*GI)->setLinkage(GlobalValue::ExternalLinkage); - } - } - } - return true; - } + // For simplicity, just give all GlobalValues ExternalLinkage. A trickier + // implementation could figure out which GlobalValues are actually + // referenced by the Named set, and which GlobalValues in the rest of + // the module are referenced by the NamedSet, and get away with leaving + // more internal and private things internal and private. But for now, + // be conservative and simple. - bool isolateGV(Module &M) { - // Mark all globals internal - // FIXME: what should we do with private linkage? - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) - if (!I->isDeclaration()) { - I->setLinkage(GlobalValue::InternalLinkage); - } - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) { - I->setLinkage(GlobalValue::InternalLinkage); - } + // Visit the GlobalVariables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { + I->setInitializer(0); + } else { + if (I->hasAvailableExternallyLinkage()) + continue; + if (I->getName() == "llvm.global_ctors") + continue; + } - // Make sure our result is globally accessible... - // by putting them in the used array - { - std::vector<Constant *> AUGs; - const Type *SBP= - Type::getInt8PtrTy(M.getContext()); - for (std::vector<GlobalValue*>::iterator GI = Named.begin(), - GE = Named.end(); GI != GE; ++GI) { - (*GI)->setLinkage(GlobalValue::ExternalLinkage); - AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP)); - } - ArrayType *AT = ArrayType::get(SBP, AUGs.size()); - Constant *Init = ConstantArray::get(AT, AUGs); - GlobalValue *gv = new GlobalVariable(M, AT, false, - GlobalValue::AppendingLinkage, - Init, "llvm.used"); - gv->setSection("llvm.metadata"); + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); } - // All of the functions may be used by global variables or the named - // globals. Loop through them and create a new, external functions that - // can be "used", instead of ones with bodies. - std::vector<Function*> NewFunctions; + // Visit the Functions. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { + I->deleteBody(); + } else { + if (I->hasAvailableExternallyLinkage()) + continue; + } - Function *Last = --M.end(); // Figure out where the last real fn is. - - for (Module::iterator I = M.begin(); ; ++I) { - if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { - Function *New = Function::Create(I->getFunctionType(), - GlobalValue::ExternalLinkage); - New->copyAttributesFrom(I); - - // If it's not the named function, delete the body of the function - I->dropAllReferences(); - - M.getFunctionList().push_back(New); - NewFunctions.push_back(New); - New->takeName(I); - } - - if (&*I == Last) break; // Stop after processing the last function - } - - // Now that we have replacements all set up, loop through the module, - // deleting the old functions, replacing them with the newly created - // functions. - if (!NewFunctions.empty()) { - unsigned FuncNum = 0; - Module::iterator I = M.begin(); - do { - if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { - // Make everything that uses the old function use the new dummy fn - I->replaceAllUsesWith(NewFunctions[FuncNum++]); - - Function *Old = I; - ++I; // Move the iterator to the new function - - // Delete the old function! - M.getFunctionList().erase(Old); - - } else { - ++I; // Skip the function we are extracting - } - } while (&*I != NewFunctions[0]); + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); } return true; @@ -170,6 +87,6 @@ } ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, - bool deleteFn, bool relinkCallees) { - return new GVExtractorPass(GVs, deleteFn, relinkCallees); + bool deleteFn) { + return new GVExtractorPass(GVs, deleteFn); }
diff --git a/src/LLVM/lib/Transforms/IPO/FunctionAttrs.cpp b/src/LLVM/lib/Transforms/IPO/FunctionAttrs.cpp index 6165ba0..0edf342 100644 --- a/src/LLVM/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/src/LLVM/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,10 +23,10 @@ #include "llvm/CallGraphSCCPass.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" @@ -41,7 +41,9 @@ namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID) {} + FunctionAttrs() : CallGraphSCCPass(ID), AA(0) { + initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -61,67 +63,25 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); CallGraphSCCPass::getAnalysisUsage(AU); } - bool PointsToLocalMemory(Value *V); + private: + AliasAnalysis *AA; }; } char FunctionAttrs::ID = 0; -INITIALIZE_PASS(FunctionAttrs, "functionattrs", - "Deduce function attributes", false, false); +INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false) Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } -/// PointsToLocalMemory - Returns whether the given pointer value points to -/// memory that is local to the function. Global constants are considered -/// local to all functions. -bool FunctionAttrs::PointsToLocalMemory(Value *V) { - SmallVector<Value*, 16> Worklist; - unsigned MaxLookup = 8; - - Worklist.push_back(V); - - do { - V = Worklist.pop_back_val()->getUnderlyingObject(); - - // An alloca instruction defines local memory. - if (isa<AllocaInst>(V)) - continue; - - // A global constant counts as local memory for our purposes. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { - if (!GV->isConstant()) - return false; - continue; - } - - // If both select values point to local memory, then so does the select. - if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - Worklist.push_back(SI->getTrueValue()); - Worklist.push_back(SI->getFalseValue()); - continue; - } - - // If all values incoming to a phi node point to local memory, then so does - // the phi. - if (PHINode *PN = dyn_cast<PHINode>(V)) { - // Don't bother inspecting phi nodes with many operands. - if (PN->getNumIncomingValues() > MaxLookup) - return false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); - continue; - } - - return false; - } while (!Worklist.empty() && --MaxLookup); - - return Worklist.empty(); -} - /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; @@ -141,14 +101,15 @@ // External node - may write memory. Just give up. return false; - if (F->doesNotAccessMemory()) + AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); + if (MRB == AliasAnalysis::DoesNotAccessMemory) // Already perfect! continue; // Definitions with weak linkage may be overridden at linktime with // something that writes memory, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) { - if (!F->onlyReadsMemory()) + if (!AliasAnalysis::onlyReadsMemory(MRB)) // May write memory. Just give up. return false; @@ -163,32 +124,62 @@ // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS(cast<Value>(I)); - if (CS && CS.getCalledFunction()) { + if (CS) { // Ignore calls to functions in the same SCC. - if (SCCNodes.count(CS.getCalledFunction())) + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) continue; - // Ignore intrinsics that only access local memory. - if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) - if (AliasAnalysis::getIntrinsicModRefBehavior(id) == - AliasAnalysis::AccessesArguments) { - // Check that all pointer arguments point to local memory. + AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS); + // If the call doesn't access arbitrary memory, we may be able to + // figure out something. + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + // If the call does access argument pointees, check each argument. + if (AliasAnalysis::doesAccessArgPointees(MRB)) + // Check whether all pointer arguments point to local memory, and + // ignore calls that only access local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI) { Value *Arg = *CI; - if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg)) - // Writes memory. Just give up. - return false; + if (Arg->getType()->isPointerTy()) { + AliasAnalysis::Location Loc(Arg, + AliasAnalysis::UnknownSize, + I->getMetadata(LLVMContext::MD_tbaa)); + if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { + if (MRB & AliasAnalysis::Mod) + // Writes non-local memory. Give up. + return false; + if (MRB & AliasAnalysis::Ref) + // Ok, it reads non-local memory. + ReadsMemory = true; + } + } } - // Only reads and writes local memory. - continue; - } - } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore loads from local memory. - if (PointsToLocalMemory(LI->getPointerOperand())) continue; + } + // The call could access any memory. If that includes writes, give up. + if (MRB & AliasAnalysis::Mod) + return false; + // If it reads, note it. + if (MRB & AliasAnalysis::Ref) + ReadsMemory = true; + continue; + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore non-volatile loads from local memory. (Atomic is okay here.) + if (!LI->isVolatile()) { + AliasAnalysis::Location Loc = AA->getLocation(LI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Ignore stores to local memory. - if (PointsToLocalMemory(SI->getPointerOperand())) + // Ignore non-volatile stores to local memory. (Atomic is okay here.) + if (!SI->isVolatile()) { + AliasAnalysis::Location Loc = AA->getLocation(SI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { + // Ignore vaargs on local memory. + AliasAnalysis::Location Loc = AA->getLocation(VI); + if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } @@ -198,10 +189,6 @@ // Writes memory. Just give up. return false; - if (isMalloc(I)) - // malloc claims not to write memory! PR3754. - return false; - // If this instruction may read memory, remember that. ReadsMemory |= I->mayReadFromMemory(); } @@ -384,6 +371,8 @@ } bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { + AA = &getAnalysis<AliasAnalysis>(); + bool Changed = AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC);
diff --git a/src/LLVM/lib/Transforms/IPO/GlobalDCE.cpp b/src/LLVM/lib/Transforms/IPO/GlobalDCE.cpp index aa18601..2b427aa 100644 --- a/src/LLVM/lib/Transforms/IPO/GlobalDCE.cpp +++ b/src/LLVM/lib/Transforms/IPO/GlobalDCE.cpp
@@ -31,7 +31,9 @@ namespace { struct GlobalDCE : public ModulePass { static char ID; // Pass identification, replacement for typeid - GlobalDCE() : ModulePass(ID) {} + GlobalDCE() : ModulePass(ID) { + initializeGlobalDCEPass(*PassRegistry::getPassRegistry()); + } // run - Do the GlobalDCE pass on the specified module, optionally updating // the specified callgraph to reflect the changes. @@ -52,7 +54,7 @@ char GlobalDCE::ID = 0; INITIALIZE_PASS(GlobalDCE, "globaldce", - "Dead Global Elimination", false, false); + "Dead Global Elimination", false, false) ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
diff --git a/src/LLVM/lib/Transforms/IPO/GlobalOpt.cpp b/src/LLVM/lib/Transforms/IPO/GlobalOpt.cpp index a77af54..3552d03 100644 --- a/src/LLVM/lib/Transforms/IPO/GlobalOpt.cpp +++ b/src/LLVM/lib/Transforms/IPO/GlobalOpt.cpp
@@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -40,6 +41,7 @@ using namespace llvm; STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); @@ -53,13 +55,17 @@ STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); +STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { + struct GlobalStatus; struct GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(ID) {} + GlobalOpt() : ModulePass(ID) { + initializeGlobalOptPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); @@ -69,13 +75,17 @@ bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); - bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, + const SmallPtrSet<const PHINode*, 16> &PHIUsers, + const GlobalStatus &GS); + bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); }; } char GlobalOpt::ID = 0; INITIALIZE_PASS(GlobalOpt, "globalopt", - "Global Variable Optimizer", false, false); + "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -85,6 +95,9 @@ /// about it. If we find out that the address of the global is taken, none of /// this info will be accurate. struct GlobalStatus { + /// isCompared - True if the global's address is used in a comparison. + bool isCompared; + /// isLoaded - True if the global is ever loaded. If the global isn't ever /// loaded it can be deleted. bool isLoaded; @@ -129,10 +142,11 @@ /// HasPHIUser - Set to true if this global has a user that is a PHI node. bool HasPHIUser; - - GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), - AccessingFunction(0), HasMultipleAccessingFunctions(false), - HasNonInstructionUser(false), HasPHIUser(false) {} + + GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + HasPHIUser(false) {} }; } @@ -165,6 +179,11 @@ const User *U = *UI; if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { GS.HasNonInstructionUser = true; + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa<PointerType>(CE->getType())) return true; + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(U)) { if (!GS.HasMultipleAccessingFunctions) { @@ -176,12 +195,14 @@ } if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { GS.isLoaded = true; - if (LI->isVolatile()) return true; // Don't hack on volatile loads. + // Don't hack on volatile/atomic loads. + if (!LI->isSimple()) return true; } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { // Don't allow a store OF the address, only stores TO the address. if (SI->getOperand(0) == V) return true; - if (SI->isVolatile()) return true; // Don't hack on volatile stores. + // Don't hack on volatile/atomic stores. + if (!SI->isSimple()) return true; // If this is a direct store to the global (i.e., the global is a scalar // value, not an aggregate), keep more specific information about @@ -221,16 +242,16 @@ if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { - // Nothing to analyse. - } else if (isa<MemTransferInst>(I)) { - const MemTransferInst *MTI = cast<MemTransferInst>(I); + GS.isCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + if (MTI->isVolatile()) return true; if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; if (MTI->getArgOperand(1) == V) GS.isLoaded = true; - } else if (isa<MemSetInst>(I)) { - assert(cast<MemSetInst>(I)->getArgOperand(0) == V && - "Memset only takes one pointer!"); + } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! @@ -262,18 +283,18 @@ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); } else if (isa<ConstantAggregateZero>(Agg)) { - if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (StructType *STy = dyn_cast<StructType>(Agg->getType())) { if (IdxV < STy->getNumElements()) return Constant::getNullValue(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast<SequentialType>(Agg->getType())) { return Constant::getNullValue(STy->getElementType()); } } else if (isa<UndefValue>(Agg)) { - if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (StructType *STy = dyn_cast<StructType>(Agg->getType())) { if (IdxV < STy->getNumElements()) return UndefValue::get(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast<SequentialType>(Agg->getType())) { return UndefValue::get(STy->getElementType()); } @@ -308,7 +329,7 @@ if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit); - } else if (CE->getOpcode() == Instruction::BitCast && + } else if (CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) { // Pointer cast, delete any stores and memsets to the global. Changed |= CleanupConstantGlobalUsers(CE, 0); @@ -324,7 +345,7 @@ // and will invalidate our notion of what Init is. Constant *SubInit = 0; if (!isa<ConstantExpr>(GEP->getOperand(0))) { - ConstantExpr *CE = + ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -361,7 +382,7 @@ // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast<Constant>(V)) return SafeToDestroyConstant(C); - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; @@ -371,15 +392,15 @@ // Stores *to* the pointer are ok. if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getOperand(0) != V; - + // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); if (GEPI == 0) return false; - + if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || !cast<Constant>(GEPI->getOperand(1))->isNullValue()) return false; - + for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); I != E; ++I) if (!isSafeSROAElementUse(*I)) @@ -393,11 +414,11 @@ /// static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa<GetElementPtrInst>(U) && - (!isa<ConstantExpr>(U) || + if (!isa<GetElementPtrInst>(U) && + (!isa<ConstantExpr>(U) || cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) return false; - + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we // don't like < 3 operand CE's, and we don't like non-constant integer // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some @@ -409,18 +430,18 @@ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); ++GEPI; // Skip over the pointer index. - + // If this is a use of an array allocation, do a bit more checking for sanity. - if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { + if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { uint64_t NumElements = AT->getNumElements(); ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); - + // Check to make sure that index falls within the array. If not, // something funny is going on, so we won't do the optimization. // if (Idx->getZExtValue() >= NumElements) return false; - + // We cannot scalar repl this level of the array unless any array // sub-indices are in-range constants. In particular, consider: // A[0][i]. We cannot know that the user isn't doing invalid things like @@ -432,16 +453,16 @@ GEPI != E; ++GEPI) { uint64_t NumElements; - if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) + if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) NumElements = SubArrayTy->getNumElements(); - else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) + else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) NumElements = SubVectorTy->getNumElements(); else { assert((*GEPI)->isStructTy() && "Indexed GEP type is not array, vector, or struct!"); continue; } - + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); if (!IdxVal || IdxVal->getZExtValue() >= NumElements) return false; @@ -465,7 +486,7 @@ } return true; } - + /// SRAGlobal - Perform scalar replacement of aggregates on the specified global /// variable. This opens the door for other optimizations by exposing the @@ -476,10 +497,10 @@ // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; - + assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); - const Type *Ty = Init->getType(); + Type *Ty = Init->getType(); std::vector<GlobalVariable*> NewGlobals; Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); @@ -488,8 +509,8 @@ unsigned StartAlignment = GV->getAlignment(); if (StartAlignment == 0) StartAlignment = TD.getABITypeAlignment(GV->getType()); - - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + + if (StructType *STy = dyn_cast<StructType>(Ty)) { NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -503,7 +524,7 @@ GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -512,9 +533,9 @@ if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) NGV->setAlignment(NewAlign); } - } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { unsigned NumElements = 0; - if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(STy)) NumElements = ATy->getNumElements(); else NumElements = cast<VectorType>(STy)->getNumElements(); @@ -522,7 +543,7 @@ if (NumElements > 16 && GV->hasNUsesOrMore(16)) return 0; // It's not worth it. NewGlobals.reserve(NumElements); - + uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { @@ -537,7 +558,7 @@ GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); - + // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. @@ -549,7 +570,7 @@ if (NewGlobals.empty()) return 0; - + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -577,15 +598,14 @@ Idxs.push_back(NullInt); for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) Idxs.push_back(CE->getOperand(i)); - NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), - &Idxs[0], Idxs.size()); + NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs); } else { GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); SmallVector<Value*, 8> Idxs; Idxs.push_back(NullInt); for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), + NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, GEPI->getName()+"."+Twine(Val),GEPI); } } @@ -615,7 +635,7 @@ } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified -/// value will trap if the value is dynamically null. PHIs keeps track of any +/// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSet<const PHINode*, 8> &PHIs) { @@ -734,8 +754,7 @@ break; if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size())); + ConstantExpr::getGetElementPtr(NewV, Idxs)); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -757,7 +776,7 @@ // Keep track of whether we are able to remove all the uses of the global // other than the store that defines it. bool AllNonStoreUsesGone = true; - + // Replace all uses of loads with uses of uses of the stored value. for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; @@ -780,7 +799,8 @@ // If we get here we could have other crazy uses that are transitively // loaded. assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || - isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!"); + isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) && + "Only expect load and stores!"); } } @@ -826,12 +846,12 @@ /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, ConstantInt *NElements, TargetData* TD) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - - const Type *GlobalType; + + Type *GlobalType; if (NElements->getZExtValue() == 1) GlobalType = AllocTy; else @@ -840,14 +860,14 @@ // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, UndefValue::get(GlobalType), GV->getName()+".body", GV, GV->isThreadLocal()); - + // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. @@ -867,10 +887,10 @@ User->replaceUsesOfWith(CI, TheBC); } } - + Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, + RepValue = ConstantExpr::getBitCast(RepValue, GV->getType()->getElementType()); // If there is a comparison against null, we will insert a global bool to @@ -890,7 +910,7 @@ SI->eraseFromParent(); continue; } - + LoadInst *LI = cast<LoadInst>(GV->use_back()); while (!LI->use_empty()) { Use &LoadUse = LI->use_begin().getUse(); @@ -898,7 +918,7 @@ LoadUse = RepValue; continue; } - + ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); // Replace the cmp X, 0 with a use of the bool value. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); @@ -963,20 +983,20 @@ if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { continue; // Fine, ignore. } - + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - + // Must index into the array and into the struct. if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - + if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. @@ -985,13 +1005,13 @@ return false; continue; } - + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; } - + return false; } return true; @@ -1000,9 +1020,9 @@ /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV /// somewhere. Transform all uses of the allocation into loads from the /// global and uses of the resultant pointer. Further, delete the store into -/// GV. This assumes that these value pass the +/// GV. This assumes that these value pass the /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. -static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { while (!Alloc->use_empty()) { Instruction *U = cast<Instruction>(*Alloc->use_begin()); @@ -1035,7 +1055,7 @@ continue; } } - + // Insert a load from the global, and use it instead of the malloc. Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); U->replaceUsesOfWith(Alloc, NL); @@ -1053,24 +1073,24 @@ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { const Instruction *User = cast<Instruction>(*UI); - + // Comparison against null is ok. if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return false; continue; } - + // getelementptr is also ok, but only a simple form. if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; - + // Otherwise the GEP is ok. continue; } - + if (const PHINode *PN = dyn_cast<PHINode>(User)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. @@ -1079,19 +1099,19 @@ if (!LoadUsingPHIs.insert(PN)) // If we have already analyzed this PHI, then it is safe. continue; - + // Make sure all uses of the PHI are simple enough to transform. if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; - + continue; } - + // Otherwise we don't know what this is, not ok. return false; } - + return true; } @@ -1110,10 +1130,10 @@ return false; LoadUsingPHIsPerLoad.clear(); } - + // If we reach here, we know that all uses of the loads and transitive uses // (through PHI nodes) are simple enough to transform. However, we don't know - // that all inputs the to the PHI nodes are in the same equivalence sets. + // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() @@ -1121,29 +1141,29 @@ const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); - + // PHI of the stored value itself is ok. if (InVal == StoredVal) continue; - + if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { // One of the PHIs in our set is (optimistically) ok. if (LoadUsingPHIs.count(InPN)) continue; return false; } - + // Load from GV is ok. if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) if (LI->getOperand(0) == GV) continue; - + // UNDEF? NULL? - + // Anything else is rejected. return false; } } - + return true; } @@ -1151,15 +1171,15 @@ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; - + if (FieldNo >= FieldVals.size()) FieldVals.resize(FieldNo+1); - + // If we already have this value, just reuse the previously scalarized // version. if (Value *FieldVal = FieldVals[FieldNo]) return FieldVal; - + // Depending on what instruction this is, we have several cases. Value *Result; if (LoadInst *LI = dyn_cast<LoadInst>(V)) { @@ -1172,24 +1192,26 @@ } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - const StructType *ST = + StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); - - Result = + + PHINode *NewPN = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); + Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { llvm_unreachable("Unknown usable value"); Result = 0; } - + return FieldVals[FieldNo] = Result; } /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from /// the load, rewrite the derived value to use the HeapSRoA'd load. -static void RewriteHeapSROALoadUser(Instruction *LoadUser, +static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { // If this is a comparison against null, handle it. @@ -1199,32 +1221,31 @@ // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, InsertedScalarizedValues, PHIsToRewrite); - + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), + Constant::getNullValue(NPtr->getType()), SCI->getName()); SCI->replaceAllUsesWith(New); SCI->eraseFromParent(); return; } - + // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) && "Unexpected GEPI!"); - + // Load the pointer for this field. unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, InsertedScalarizedValues, PHIsToRewrite); - + // Create the new GEP idx vector. SmallVector<Value*, 8> GEPIdx; GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - - Value *NGEPI = GetElementPtrInst::Create(NewPtr, - GEPIdx.begin(), GEPIdx.end(), + + Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); @@ -1238,12 +1259,10 @@ // already been seen first by another load, so its uses have already been // processed. PHINode *PN = cast<PHINode>(LoadUser); - bool Inserted; - DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; - tie(InsertPos, Inserted) = - InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); - if (!Inserted) return; - + if (!InsertedScalarizedValues.insert(std::make_pair(PN, + std::vector<Value*>())).second) + return; + // If this is the first time we've seen this PHI, recursively process all // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { @@ -1256,7 +1275,7 @@ /// is a value loaded from the global. Eliminate all uses of Ptr, making them /// use FieldGlobals instead. All uses of loaded values satisfy /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. -static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); @@ -1264,7 +1283,7 @@ Instruction *User = cast<Instruction>(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } - + if (Load->use_empty()) { Load->eraseFromParent(); InsertedScalarizedValues.erase(Load); @@ -1276,8 +1295,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value* NElems, TargetData *TD) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - const Type* MAT = getMallocAllocatedType(CI); - const StructType *STy = cast<StructType>(MAT); + Type* MAT = getMallocAllocatedType(CI); + StructType *STy = cast<StructType>(MAT); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1289,11 +1308,11 @@ // new mallocs at the same place as CI, and N globals. std::vector<Value*> FieldGlobals; std::vector<Value*> FieldMallocs; - + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - const Type *FieldTy = STy->getElementType(FieldNo); - const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); - + Type *FieldTy = STy->getElementType(FieldNo); + PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, @@ -1301,11 +1320,11 @@ GV->getName() + ".f" + Twine(FieldNo), GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); - if (const StructType *ST = dyn_cast<StructType>(FieldTy)) + if (StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, 0, @@ -1313,7 +1332,7 @@ FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); } - + // The tricky aspect of this transformation is handling the case when malloc // fails. In the original code, malloc failing would set the result pointer // of malloc to null. In this case, some mallocs could succeed and others @@ -1340,25 +1359,24 @@ // Split the basic block at the old malloc. BasicBlock *OrigBB = CI->getParent(); BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); - + // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), "malloc_ret_null", OrigBB->getParent()); - + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond // branch on RunningOr. OrigBB->getTerminator()->eraseFromParent(); BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - + // Within the NullPtrBlock, we need to emit a comparison and branch for each // pointer, because some may be null while others are not. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, - Constant::getNullValue(GVVal->getType()), - "tmp"); + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType())); BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", OrigBB->getParent()); BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", @@ -1371,10 +1389,10 @@ new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], FreeBlock); BranchInst::Create(NextBlock, FreeBlock); - + NullPtrBlock = NextBlock; } - + BranchInst::Create(ContBB, NullPtrBlock); // CI is no longer needed, remove it. @@ -1385,28 +1403,28 @@ /// inserted for a given load. DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; - + std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; - + // Okay, the malloc site is completely handled. All of the uses of GV are now // loads, and all uses of those loads are simple. Rewrite them to use loads // of the per-field globals instead. for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { Instruction *User = cast<Instruction>(*UI++); - + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); continue; } - + // Must be a store of null. StoreInst *SI = cast<StoreInst>(User); assert(isa<ConstantPointerNull>(SI->getOperand(0)) && "Unexpected heap-sra user!"); - + // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); + PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); Constant *Null = Constant::getNullValue(PT->getElementType()); new StoreInst(Null, FieldGlobals[i], SI); } @@ -1430,7 +1448,7 @@ FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } - + // Drop all inter-phi links and any loads that made it this far. for (DenseMap<Value*, std::vector<Value*> >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1440,7 +1458,7 @@ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) LI->dropAllReferences(); } - + // Delete all the phis and loads now that inter-references are dead. for (DenseMap<Value*, std::vector<Value*> >::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); @@ -1450,7 +1468,7 @@ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) LI->eraseFromParent(); } - + // The old global is now dead, remove it. GV->eraseFromParent(); @@ -1463,12 +1481,12 @@ /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { if (!TD) return false; - + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1508,7 +1526,7 @@ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); return true; } - + // If the allocation is an array of structures, consider transforming this // into multiple malloc'd arrays, one for each field. This is basically // SRoA for malloc'd memory. @@ -1516,10 +1534,10 @@ // If this is an allocation of a fixed size array of structs, analyze as a // variable size array. malloc [100 x struct],1 -> malloc struct, 100 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) AllocTy = AT->getElementType(); - const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); + StructType *AllocSTy = dyn_cast<StructType>(AllocTy); if (!AllocSTy) return false; @@ -1530,8 +1548,8 @@ // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); @@ -1544,13 +1562,13 @@ CI = dyn_cast<BitCastInst>(Malloc) ? extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); } - + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); return true; } - + return false; -} +} // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge // that only one value (besides its initializer) is ever stored to the global. @@ -1568,15 +1586,14 @@ GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = - ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - const Type* MallocType = getMallocAllocatedType(CI); - if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + Type* MallocType = getMallocAllocatedType(CI); + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, GVI, TD)) return true; } @@ -1590,8 +1607,8 @@ /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { - const Type *GVElType = GV->getType()->getElementType(); - + Type *GVElType = GV->getType()->getElementType(); + // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select // between them is very expensive and unlikely to lead to later @@ -1611,11 +1628,11 @@ } DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); - + // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, - GlobalValue::InternalLinkage, + GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", GV->isThreadLocal()); @@ -1684,10 +1701,12 @@ /// ProcessInternalGlobal - Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, - Module::global_iterator &GVI) { - SmallPtrSet<const PHINode*, 16> PHIUsers; - GlobalStatus GS; +bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + if (!GV->hasLocalLinkage()) + return false; + + // Do more involved optimizations if the global is internal. GV->removeDeadConstantUsers(); if (GV->use_empty()) { @@ -1697,140 +1716,139 @@ return true; } - if (!AnalyzeGlobal(GV, GS, PHIUsers)) { -#if 0 - DEBUG(dbgs() << "Global: " << *GV); - DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); - DEBUG(dbgs() << " StoredType = "); - switch (GS.StoredType) { - case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; - case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); - break; - case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; - case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; - } - if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); - if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(dbgs() << " AccessingFunction = " - << GS.AccessingFunction->getName() << "\n"); - DEBUG(dbgs() << " HasMultipleAccessingFunctions = " - << GS.HasMultipleAccessingFunctions << "\n"); - DEBUG(dbgs() << " HasNonInstructionUser = " - << GS.HasNonInstructionUser<<"\n"); - DEBUG(dbgs() << "\n"); -#endif - - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive we can make - // this global a local variable) we replace the global with a local alloca - // in this function. - // - // NOTE: It doesn't make sense to promote non single-value types since we - // are just replacing static memory to stack memory. - // - // If the global is in different address space, don't bring it to stack. - if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && - GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction - ->getEntryBlock().begin()); - const Type* ElemTy = GV->getType()->getElementType(); - // FIXME: Pass Global's alignment when globals have alignment - AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); - if (!isa<UndefValue>(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, &FirstI); + SmallPtrSet<const PHINode*, 16> PHIUsers; + GlobalStatus GS; - GV->replaceAllUsesWith(Alloca); - GV->eraseFromParent(); - ++NumLocalized; - return true; - } - - // If the global is never loaded (but may be stored to), it is dead. - // Delete it now. - if (!GS.isLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + if (AnalyzeGlobal(GV, GS, PHIUsers)) + return false; - // Delete any stores we can find to the global. We may not be able to - // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - // If the global is dead now, delete it. - if (GV->use_empty()) { - GV->eraseFromParent(); - ++NumDeleted; - Changed = true; - } - return Changed; - - } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); - GV->setConstant(true); - - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - // If the global is dead now, just nuke it. - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; - } - - ++NumMarked; - return true; - } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } - } else if (GS.StoredType == GlobalStatus::isStoredOnce) { - // If the initial value for the global was an undef value, and if only - // one other value was stored into it, we can just change the - // initializer to be the stored value, then delete all stores to the - // global. This allows us to mark it constant. - if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (isa<UndefValue>(GV->getInitializer())) { - // Change the initial value here. - GV->setInitializer(SOVConstant); - - // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); - - if (GV->use_empty()) { - DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); - GV->eraseFromParent(); - ++NumDeleted; - } else { - GVI = GV; - } - ++NumSubstitute; - return true; - } - - // Try to optimize globals based on the knowledge that only one value - // (besides its initializer) is ever stored to the global. - if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysisIfAvailable<TargetData>())) - return true; - - // Otherwise, if the global was not a boolean, we can shrink it to be a - // boolean. - if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { - ++NumShrunkToBool; - return true; - } - } + if (!GS.isCompared && !GV->hasUnnamedAddr()) { + GV->setUnnamedAddr(true); + NumUnnamed++; } + + if (GV->isConstant() || !GV->hasInitializer()) + return false; + + return ProcessInternalGlobal(GV, GVI, PHIUsers, GS); +} + +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI, + const SmallPtrSet<const PHINode*, 16> &PHIUsers, + const GlobalStatus &GS) { + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction + ->getEntryBlock().begin()); + Type* ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + if (!isa<UndefValue>(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, delete it. + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + Changed = true; + } + return Changed; + + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + GV->setConstant(true); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be the stored value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (isa<UndefValue>(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + if (GV->use_empty()) { + DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; + } + + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, + getAnalysisIfAvailable<TargetData>())) + return true; + + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + ++NumShrunkToBool; + return true; + } + } + return false; } @@ -1917,62 +1935,51 @@ if (New && New != CE) GV->setInitializer(New); } - // Do more involved optimizations if the global is internal. - if (!GV->isConstant() && GV->hasLocalLinkage() && - GV->hasInitializer()) - Changed |= ProcessInternalGlobal(GV, GVI); + + Changed |= ProcessGlobal(GV, GVI); } return Changed; } -/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (I->getName() == "llvm.global_ctors") { - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - - // Verify that the initializer is simple enough for us to handle. - if (!I->hasDefinitiveInitializer()) return 0; - ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); - if (!CA) return 0; - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { - if (isa<ConstantPointerNull>(CS->getOperand(1))) - continue; + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (GV == 0) return 0; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) return 0; - // Must have a function or null ptr. - if (!isa<Function>(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) - return 0; - } else { - return 0; - } - - return I; - } - return 0; + if (isa<ConstantAggregateZero>(GV->getInitializer())) + return GV; + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + if (isa<ConstantAggregateZero>(*i)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(*i); + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) + return 0; + } + + return GV; } /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, /// return a list of the functions and null terminator as a vector. static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector<Function*>(); ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); std::vector<Function*> Result; Result.reserve(CA->getNumOperands()); @@ -1985,48 +1992,50 @@ /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const std::vector<Function*> &Ctors) { // If we made a change, reassemble the initializer list. - std::vector<Constant*> CSVals; - CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); - CSVals.push_back(0); - + Constant *CSVals[2]; + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); + CSVals[1] = 0; + + StructType *StructTy = + cast <StructType>( + cast<ArrayType>(GCL->getType()->getElementType())->getElementType()); + // Create the new init list. std::vector<Constant*> CAList; for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), false); - const PointerType *PFTy = PointerType::getUnqual(FTy); + PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 2147483647); + 0x7fffffff); } - CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); + CAList.push_back(ConstantStruct::get(StructTy, CSVals)); } - + // Create the array initializer. - const Type *StructTy = - cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); - + // If we didn't change the number of elements, don't create a new GV. if (CA->getType() == GCL->getInitializer()->getType()) { GCL->setInitializer(CA); return GCL; } - + // Create the new global and insert it next to the existing list. GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); - + // Nuke the old list, replacing any uses with the new one. if (!GCL->use_empty()) { Constant *V = NGV; @@ -2035,7 +2044,7 @@ GCL->replaceAllUsesWith(V); } GCL->eraseFromParent(); - + if (Ctors.size()) return NGV; else @@ -2043,17 +2052,86 @@ } -static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, - Value *V) { +static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) { if (Constant *CV = dyn_cast<Constant>(V)) return CV; Constant *R = ComputedValues[V]; assert(R && "Reference to an uncomputed value!"); return R; } +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants); + + +/// isSimpleEnoughValueToCommit - Return true if the specified constant can be +/// handled by the code generator. We don't want to generate something like: +/// void *X = &X/42; +/// because the code generator doesn't have a relocation that can handle that. +/// +/// This function should be called if C was not found (but just got inserted) +/// in SimpleConstants to avoid having to rescan the same constants all the +/// time. +static bool isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants) { + // Simple integer, undef, constant aggregate zero, global addresses, etc are + // all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || + isa<GlobalValue>(C)) + return true; + + // Aggregate values are safe if all their elements are. + if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || + isa<ConstantVector>(C)) { + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + Constant *Op = cast<Constant>(C->getOperand(i)); + if (!isSimpleEnoughValueToCommit(Op, SimpleConstants)) + return false; + } + return true; + } + + // We don't know exactly what relocations are allowed in constant expressions, + // so we allow &global+constantoffset, which is safe and uniformly supported + // across targets. + ConstantExpr *CE = cast<ConstantExpr>(C); + switch (CE->getOpcode()) { + case Instruction::BitCast: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + // These casts are always fine if the casted value is. + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + // GEP is fine if it is simple + constant offset. + case Instruction::GetElementPtr: + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + + case Instruction::Add: + // We allow simple+cst. + if (!isa<ConstantInt>(CE->getOperand(1))) + return false; + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants); + } + return false; +} + +static inline bool +isSimpleEnoughValueToCommit(Constant *C, + SmallPtrSet<Constant*, 8> &SimpleConstants) { + // If we already checked this constant, we win. + if (!SimpleConstants.insert(C)) return true; + // Check the constant. + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants); +} + + /// isSimpleEnoughPointerToCommit - Return true if this constant is simple -/// enough for us to understand. In particular, if it is a cast of something, -/// we punt. We basically just support direct accesses to globals and GEP's of +/// enough for us to understand. In particular, if it is a cast to anything +/// other than from one pointer type to another pointer type, we punt. +/// We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't @@ -2062,19 +2140,19 @@ return false; if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - return GV->hasDefinitiveInitializer(); + return GV->hasUniqueInitializer(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { // Handle a constantexpr gep. if (CE->getOpcode() == Instruction::GetElementPtr && isa<GlobalVariable>(CE->getOperand(0)) && cast<GEPOperator>(CE)->isInBounds()) { GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - // Do not allow weak/linkonce/dllimport/dllexport linkage or + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or // external globals. - if (!GV->hasDefinitiveInitializer()) + if (!GV->hasUniqueInitializer()) return false; // The first index must be zero. @@ -2087,7 +2165,18 @@ return false; return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + + // A constantexpr bitcast from a pointer to another pointer is a no-op, + // and we know how to evaluate it by moving the bitcast from the pointer + // operand to the value operand. + } else if (CE->getOpcode() == Instruction::BitCast && + isa<GlobalVariable>(CE->getOperand(0))) { + // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or + // external globals. + return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); } + } + return false; } @@ -2101,9 +2190,9 @@ assert(Val->getType() == Init->getType() && "Type mismatch!"); return Val; } - + std::vector<Constant*> Elts; - if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + if (StructType *STy = dyn_cast<StructType>(Init->getType())) { // Break up the constant into its elements. if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { @@ -2119,51 +2208,48 @@ llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } - + // Replace the element that we are supposed to. ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); - - // Return the modified struct. - return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), - STy->isPacked()); - } else { - ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - const SequentialType *InitTy = cast<SequentialType>(Init->getType()); - uint64_t NumElts; - if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) - NumElts = ATy->getNumElements(); - else - NumElts = cast<VectorType>(InitTy)->getNumElements(); - - - // Break up the array into elements. - if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - Elts.push_back(cast<Constant>(*i)); - } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { - for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) - Elts.push_back(cast<Constant>(*i)); - } else if (isa<ConstantAggregateZero>(Init)) { - Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); - } else { - assert(isa<UndefValue>(Init) && "This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); - } - - assert(CI->getZExtValue() < NumElts); - Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - - if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast<ArrayType>(InitTy), Elts); - else - return ConstantVector::get(&Elts[0], Elts.size()); - } + // Return the modified struct. + return ConstantStruct::get(STy, Elts); + } + + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); + SequentialType *InitTy = cast<SequentialType>(Init->getType()); + + uint64_t NumElts; + if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast<VectorType>(InitTy)->getNumElements(); + + // Break up the array into elements. + if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (isa<ConstantAggregateZero>(Init)) { + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); + } else { + assert(isa<UndefValue>(Init) && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); + } + + assert(CI->getZExtValue() < NumElts); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + return ConstantVector::get(Elts); } /// CommitValueTo - We have decided that Addr (which satisfies the predicate @@ -2189,14 +2275,14 @@ // is the most up-to-date. DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); if (I != Memory.end()) return I->second; - + // Access it. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); return 0; } - + // Handle a constantexpr getelementptr. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -2216,17 +2302,19 @@ const SmallVectorImpl<Constant*> &ActualArgs, std::vector<Function*> &CallStack, DenseMap<Constant*, Constant*> &MutatedMemory, - std::vector<GlobalVariable*> &AllocaTmps) { + std::vector<GlobalVariable*> &AllocaTmps, + SmallPtrSet<Constant*, 8> &SimpleConstants, + const TargetData *TD) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; - + CallStack.push_back(F); - + /// Values - As we compute SSA register values, we store their contents here. DenseMap<Value*, Constant*> Values; - + // Initialize arguments to the incoming values specified. unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; @@ -2237,21 +2325,65 @@ /// we can only evaluate any one basic block at most once. This set keeps /// track of what we have executed so we can detect recursive cases etc. SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; - + // CurInst - The current instruction we're evaluating. BasicBlock::iterator CurInst = F->begin()->begin(); - + // This is the main evaluation loop. while (1) { Constant *InstResult = 0; - + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (SI->isVolatile()) return false; // no volatile accesses. + if (!SI->isSimple()) return false; // no volatile/atomic accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; + Constant *Val = getVal(Values, SI->getOperand(0)); + + // If this might be too difficult for the backend to handle (e.g. the addr + // of one global variable divided by another) then we can't commit it. + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants)) + return false; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->getOpcode() == Instruction::BitCast) { + // If we're evaluating a store through a bitcast, then we need + // to pull the bitcast off the pointer type and push it onto the + // stored value. + Ptr = CE->getOperand(0); + + Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); + + // In order to push the bitcast onto the stored value, a bitcast + // from NewTy to Val's type must be legal. If it's not, we can try + // introspecting NewTy to find a legal conversion. + while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + // If NewTy is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + if (StructType *STy = dyn_cast<StructType>(NewTy)) { + NewTy = STy->getTypeAtIndex(0U); + + IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant * const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); + + // If we can't improve the situation by introspecting NewTy, + // we have to give up. + } else { + return 0; + } + } + + // If we found compatible types, go ahead and push the bitcast + // onto the stored value. + Val = ConstantExpr::getBitCast(Val, NewTy); + } + MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), @@ -2275,22 +2407,22 @@ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(Values, *i)); - InstResult = cast<GEPOperator>(GEP)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : - ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + InstResult = + ConstantExpr::getGetElementPtr(P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (LI->isVolatile()) return false; // no volatile accesses. + if (!LI->isSimple()) return false; // no volatile/atomic accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), MutatedMemory); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. - const Type *Ty = AI->getType()->getElementType(); + Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); - InstResult = AllocaTmps.back(); + InstResult = AllocaTmps.back(); } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { // Debug info can safely be ignored here. @@ -2302,6 +2434,20 @@ // Cannot handle inline asm. if (isa<InlineAsm>(CI->getCalledValue())) return false; + if (MemSetInst *MSI = dyn_cast<MemSetInst>(CI)) { + if (MSI->isVolatile()) return false; + Constant *Ptr = getVal(Values, MSI->getDest()); + Constant *Val = getVal(Values, MSI->getValue()); + Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr), + MutatedMemory); + if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { + // This memset is a no-op. + ++CurInst; + continue; + } + return false; + } + // Resolve function pointers. Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); @@ -2315,8 +2461,7 @@ if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals.data(), - Formals.size())) { + if (Constant *C = ConstantFoldCall(Callee, Formals)) { InstResult = C; } else { return false; @@ -2324,11 +2469,11 @@ } else { if (Callee->getFunctionType()->isVarArg()) return false; - + Constant *RetVal; // Execute the call, if successful, use the return value. if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, - MutatedMemory, AllocaTmps)) + MutatedMemory, AllocaTmps, SimpleConstants, TD)) return false; InstResult = RetVal; } @@ -2342,7 +2487,7 @@ dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); if (!Cond) return false; // Cannot determine. - NewBB = BI->getSuccessor(!Cond->getZExtValue()); + NewBB = BI->getSuccessor(!Cond->getZExtValue()); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { ConstantInt *Val = @@ -2358,20 +2503,20 @@ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { if (RI->getNumOperands()) RetVal = getVal(Values, RI->getOperand(0)); - + CallStack.pop_back(); // return from fn. return true; // We succeeded at evaluating this ctor! } else { - // invoke, unwind, unreachable. + // invoke, unwind, resume, unreachable. return false; // Cannot handle this terminator. } - + // Okay, we succeeded in evaluating this control flow. See if we have // executed the new block before. If so, we have a looping function, // which we cannot evaluate in reasonable time. if (!ExecutedBlocks.insert(NewBB)) return false; // looped! - + // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. @@ -2387,10 +2532,14 @@ // Did not know how to evaluate this! return false; } - - if (!CurInst->use_empty()) + + if (!CurInst->use_empty()) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) + InstResult = ConstantFoldConstantExpression(CE, TD); + Values[CurInst] = InstResult; - + } + // Advance program counter. ++CurInst; } @@ -2398,7 +2547,7 @@ /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F) { +static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { /// MutatedMemory - For each store we execute, we update this map. Loads /// check this to get the most up-to-date value. If evaluation is successful, /// this state is committed to the process. @@ -2408,17 +2557,23 @@ /// to represent its body. This vector is needed so we can delete the /// temporary globals when we are done. std::vector<GlobalVariable*> AllocaTmps; - + /// CallStack - This is used to detect recursion. In pathological situations /// we could hit exponential behavior, but at least there is nothing /// unbounded. std::vector<Function*> CallStack; + /// SimpleConstants - These are constants we have checked and know to be + /// simple enough to live in a static initializer of a global. + SmallPtrSet<Constant*, 8> SimpleConstants; + // Call the function. Constant *RetValDummy; bool EvalSuccess = EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>(), CallStack, - MutatedMemory, AllocaTmps); + MutatedMemory, AllocaTmps, + SimpleConstants, TD); + if (EvalSuccess) { // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" @@ -2428,13 +2583,13 @@ E = MutatedMemory.end(); I != E; ++I) CommitValueTo(I->second, I->first); } - + // At this point, we are done interpreting. If we created any 'alloca' // temporaries, release them now. while (!AllocaTmps.empty()) { GlobalVariable *Tmp = AllocaTmps.back(); AllocaTmps.pop_back(); - + // If there are still users of the alloca, the program is doing something // silly, e.g. storing the address of the alloca somewhere and using it // later. Since this is undefined, we'll just make it be null. @@ -2442,7 +2597,7 @@ Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); delete Tmp; } - + return EvalSuccess; } @@ -2454,7 +2609,8 @@ std::vector<Function*> Ctors = ParseGlobalCtors(GCL); bool MadeChange = false; if (Ctors.empty()) return false; - + + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); // Loop over global ctors, optimizing them when we can. for (unsigned i = 0; i != Ctors.size(); ++i) { Function *F = Ctors[i]; @@ -2467,12 +2623,12 @@ } break; } - + // We cannot simplify external ctor functions. if (F->empty()) continue; - + // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F)) { + if (EvaluateStaticConstructor(F, TD)) { Ctors.erase(Ctors.begin()+i); MadeChange = true; --i; @@ -2480,9 +2636,9 @@ continue; } } - + if (!MadeChange) return false; - + GCL = InstallGlobalCtors(GCL, Ctors); return true; } @@ -2544,33 +2700,152 @@ return Changed; } +static Function *FindCXAAtExit(Module &M) { + Function *Fn = M.getFunction("__cxa_atexit"); + + if (!Fn) + return 0; + + FunctionType *FTy = Fn->getFunctionType(); + + // Checking that the function has the right return type, the right number of + // parameters and that they all have pointer types should be enough. + if (!FTy->getReturnType()->isIntegerTy() || + FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return 0; + + return Fn; +} + +/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ +/// destructor and can therefore be eliminated. +/// Note that we assume that other optimization passes have already simplified +/// the code so we only look for a function with a single basic block, where +/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor. +static bool cxxDtorIsEmpty(const Function &Fn, + SmallPtrSet<const Function *, 8> &CalledFunctions) { + // FIXME: We could eliminate C++ destructors if they're readonly/readnone and + // nounwind, but that doesn't seem worth doing. + if (Fn.isDeclaration()) + return false; + + if (++Fn.begin() != Fn.end()) + return false; + + const BasicBlock &EntryBlock = Fn.getEntryBlock(); + for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); + I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore debug intrinsics. + if (isa<DbgInfoIntrinsic>(CI)) + continue; + + const Function *CalledFn = CI->getCalledFunction(); + + if (!CalledFn) + return false; + + SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions); + + // Don't treat recursive functions as empty. + if (!NewCalledFunctions.insert(CalledFn)) + return false; + + if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) + return false; + } else if (isa<ReturnInst>(*I)) + return true; + else + return false; + } + + return false; +} + +bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { + /// Itanium C++ ABI p3.3.5: + /// + /// After constructing a global (or local static) object, that will require + /// destruction on exit, a termination function is registered as follows: + /// + /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); + /// + /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the + /// call f(p) when DSO d is unloaded, before all such termination calls + /// registered before this one. It returns zero if registration is + /// successful, nonzero on failure. + + // This pass will look for calls to __cxa_atexit where the function is trivial + // and remove them. + bool Changed = false; + + for (Function::use_iterator I = CXAAtExitFn->use_begin(), + E = CXAAtExitFn->use_end(); I != E;) { + // We're only interested in calls. Theoretically, we could handle invoke + // instructions as well, but neither llvm-gcc nor clang generate invokes + // to __cxa_atexit. + CallInst *CI = dyn_cast<CallInst>(*I++); + if (!CI) + continue; + + Function *DtorFn = + dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); + if (!DtorFn) + continue; + + SmallPtrSet<const Function *, 8> CalledFunctions; + if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) + continue; + + // Just remove the call. + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + CI->eraseFromParent(); + + ++NumCXXDtorsRemoved; + + Changed |= true; + } + + return Changed; +} + bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - + // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); + Function *CXAAtExitFn = FindCXAAtExit(M); + bool LocalChange = true; while (LocalChange) { LocalChange = false; - + // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M); - + // Optimize global_ctors list. if (GlobalCtors) LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); - + // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); + + // Try to remove trivial global destructors. + if (CXAAtExitFn) + LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + Changed |= LocalChange; } - + // TODO: Move all global ctors functions to the end of the module for code // layout. - + return Changed; }
diff --git a/src/LLVM/lib/Transforms/IPO/INSTALL.vcxproj b/src/LLVM/lib/Transforms/IPO/INSTALL.vcxproj new file mode 100644 index 0000000..20538d4 --- /dev/null +++ b/src/LLVM/lib/Transforms/IPO/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/IPO/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/IPO/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/IPO/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/IPO/IPConstantPropagation.cpp b/src/LLVM/lib/Transforms/IPO/IPConstantPropagation.cpp index 1b3cf78..d757e1f 100644 --- a/src/LLVM/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/src/LLVM/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -35,7 +35,9 @@ /// struct IPCP : public ModulePass { static char ID; // Pass identification, replacement for typeid - IPCP() : ModulePass(ID) {} + IPCP() : ModulePass(ID) { + initializeIPCPPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); private: @@ -46,7 +48,7 @@ char IPCP::ID = 0; INITIALIZE_PASS(IPCP, "ipconstprop", - "Interprocedural constant propagation", false, false); + "Interprocedural constant propagation", false, false) ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } @@ -165,7 +167,7 @@ // Check to see if this function returns a constant. SmallVector<Value *,4> RetVals; - const StructType *STy = dyn_cast<StructType>(F.getReturnType()); + StructType *STy = dyn_cast<StructType>(F.getReturnType()); if (STy) for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) RetVals.push_back(UndefValue::get(STy->getElementType(i))); @@ -184,7 +186,7 @@ // Find the returned value Value *V; if (!STy) - V = RI->getOperand(i); + V = RI->getOperand(0); else V = FindInsertedValue(RI->getOperand(0), i);
diff --git a/src/LLVM/lib/Transforms/IPO/IPO.cpp b/src/LLVM/lib/Transforms/IPO/IPO.cpp index 340b70e..6233922 100644 --- a/src/LLVM/lib/Transforms/IPO/IPO.cpp +++ b/src/LLVM/lib/Transforms/IPO/IPO.cpp
@@ -7,17 +7,49 @@ // //===----------------------------------------------------------------------===// // -// This file implements the C bindings for libLLVMIPO.a, which implements -// several transformations over the LLVM intermediate representation. +// This file implements the common infrastructure (including C bindings) for +// libLLVMIPO.a, which implements several transformations over the LLVM +// intermediate representation. // //===----------------------------------------------------------------------===// +#include "llvm-c/Initialization.h" #include "llvm-c/Transforms/IPO.h" +#include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Transforms/IPO.h" using namespace llvm; +void llvm::initializeIPO(PassRegistry &Registry) { + initializeArgPromotionPass(Registry); + initializeConstantMergePass(Registry); + initializeDAEPass(Registry); + initializeDAHPass(Registry); + initializeFunctionAttrsPass(Registry); + initializeGlobalDCEPass(Registry); + initializeGlobalOptPass(Registry); + initializeIPCPPass(Registry); + initializeAlwaysInlinerPass(Registry); + initializeSimpleInlinerPass(Registry); + initializeInternalizePassPass(Registry); + initializeLoopExtractorPass(Registry); + initializeBlockExtractorPassPass(Registry); + initializeSingleLoopExtractorPass(Registry); + initializeMergeFunctionsPass(Registry); + initializePartialInlinerPass(Registry); + initializePruneEHPass(Registry); + initializeStripDeadPrototypesPassPass(Registry); + initializeStripSymbolsPass(Registry); + initializeStripDebugDeclarePass(Registry); + initializeStripDeadDebugInfoPass(Registry); + initializeStripNonDebugSymbolsPass(Registry); +} + +void LLVMInitializeIPO(LLVMPassRegistryRef R) { + initializeIPO(*unwrap(R)); +} + void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createArgumentPromotionPass()); } @@ -30,10 +62,6 @@ unwrap(PM)->add(createDeadArgEliminationPass()); } -void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createDeadTypeEliminationPass()); -} - void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionAttrsPass()); } @@ -42,6 +70,10 @@ unwrap(PM)->add(createFunctionInliningPass()); } +void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(llvm::createAlwaysInlinerPass()); +} + void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createGlobalDCEPass()); } @@ -54,10 +86,6 @@ unwrap(PM)->add(createIPConstantPropagationPass()); } -void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLowerSetJmpPass()); -} - void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPruneEHPass()); } @@ -70,11 +98,6 @@ unwrap(PM)->add(createInternalizePass(AllButMain != 0)); } - -void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) { - // FIXME: Remove in LLVM 3.0. -} - void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createStripDeadPrototypesPass()); }
diff --git a/src/LLVM/lib/Transforms/IPO/InlineAlways.cpp b/src/LLVM/lib/Transforms/IPO/InlineAlways.cpp index ecc60ad..c0426da 100644 --- a/src/LLVM/lib/Transforms/IPO/InlineAlways.cpp +++ b/src/LLVM/lib/Transforms/IPO/InlineAlways.cpp
@@ -23,6 +23,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -32,11 +33,13 @@ // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; + SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000) {} + // Use extremely low threshold. + AlwaysInliner() : Inliner(ID, -2000000000) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -50,8 +53,8 @@ void growCachedCostInfo(Function* Caller, Function* Callee) { CA.growCachedCostInfo(Caller, Callee); } - virtual bool doFinalization(CallGraph &CG) { - return removeDeadFunctions(CG, &NeverInline); + virtual bool doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG, &NeverInline); } virtual bool doInitialization(CallGraph &CG); void releaseMemory() { @@ -61,16 +64,21 @@ } char AlwaysInliner::ID = 0; -INITIALIZE_PASS(AlwaysInliner, "always-inline", - "Inliner for always_inline functions", false, false); +INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false) Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } -// doInitialization - Initializes the vector of functions that have not +// doInitialization - Initializes the vector of functions that have not // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { + CA.setTargetData(getAnalysisIfAvailable<TargetData>()); + Module &M = CG.getModule(); - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
diff --git a/src/LLVM/lib/Transforms/IPO/InlineSimple.cpp b/src/LLVM/lib/Transforms/IPO/InlineSimple.cpp index 9c6637d..84dd4fd 100644 --- a/src/LLVM/lib/Transforms/IPO/InlineSimple.cpp +++ b/src/LLVM/lib/Transforms/IPO/InlineSimple.cpp
@@ -22,6 +22,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -30,11 +31,15 @@ class SimpleInliner : public Inliner { // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; + SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - SimpleInliner() : Inliner(ID) {} - SimpleInliner(int Threshold) : Inliner(ID, Threshold) {} + SimpleInliner() : Inliner(ID) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + SimpleInliner(int Threshold) : Inliner(ID, Threshold) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -56,21 +61,25 @@ } char SimpleInliner::ID = 0; -INITIALIZE_PASS(SimpleInliner, "inline", - "Function Integration/Inlining", false, false); +INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(SimpleInliner, "inline", + "Function Integration/Inlining", false, false) Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } -Pass *llvm::createFunctionInliningPass(int Threshold) { +Pass *llvm::createFunctionInliningPass(int Threshold) { return new SimpleInliner(Threshold); } // doInitialization - Initializes the vector of functions that have been // annotated with the noinline attribute. bool SimpleInliner::doInitialization(CallGraph &CG) { - + CA.setTargetData(getAnalysisIfAvailable<TargetData>()); + Module &M = CG.getModule(); - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline)) @@ -78,34 +87,34 @@ // Get llvm.noinline GlobalVariable *GV = M.getNamedGlobal("llvm.noinline"); - + if (GV == 0) return false; // Don't crash on invalid code if (!GV->hasDefinitiveInitializer()) return false; - + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - + if (InitList == 0) return false; // Iterate over each element and add to the NeverInline set for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - + // Get Source const Constant *Elt = InitList->getOperand(i); - + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt)) - if (CE->getOpcode() == Instruction::BitCast) + if (CE->getOpcode() == Instruction::BitCast) Elt = CE->getOperand(0); - + // Insert into set of functions to never inline if (const Function *F = dyn_cast<Function>(Elt)) NeverInline.insert(F); } - + return false; }
diff --git a/src/LLVM/lib/Transforms/IPO/Inliner.cpp b/src/LLVM/lib/Transforms/IPO/Inliner.cpp index 4983e8e..f00935b0 100644 --- a/src/LLVM/lib/Transforms/IPO/Inliner.cpp +++ b/src/LLVM/lib/Transforms/IPO/Inliner.cpp
@@ -29,7 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include <set> using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); @@ -52,7 +51,8 @@ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} Inliner::Inliner(char &ID, int Threshold) - : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? + InlineLimit : Threshold) {} /// getAnalysisUsage - For this class, we declare that we require and preserve /// the call graph. If the derived class implements this method, it should @@ -62,7 +62,7 @@ } -typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > +typedef DenseMap<ArrayType*, std::vector<AllocaInst*> > InlinedArrayAllocasTy; /// InlineCallIfPossible - If it is possible to inline the specified call site, @@ -74,7 +74,8 @@ /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas) { + InlinedArrayAllocasTy &InlinedArrayAllocas, + int InlineHistory) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -91,7 +92,6 @@ !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); - // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. @@ -115,6 +115,21 @@ // SmallPtrSet<AllocaInst*, 16> UsedAllocas; + // When processing our SCC, check to see if CS was inlined from some other + // call site. For example, if we're processing "A" in this code: + // A() { B() } + // B() { x = alloca ... C() } + // C() { y = alloca ... } + // Assume that C was not inlined into B initially, and so we're processing A + // and decide to inline B into A. Doing this makes an alloca available for + // reuse and makes a callsite (C) available for inlining. When we process + // the C call site we don't want to do any alloca merging between X and Y + // because their scopes are not disjoint. We could make this smarter by + // keeping track of the inline history for each alloca in the + // InlinedArrayAllocas but this isn't likely to be a significant win. + if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. + return true; + // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); @@ -124,7 +139,7 @@ // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). - const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); + ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (ATy == 0 || AI->isArrayAllocation()) continue; @@ -152,19 +167,21 @@ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! - DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI); + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " + << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; + IFI.StaticAllocas[AllocaNo] = 0; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; - + // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark @@ -234,20 +251,25 @@ if (Caller->hasLocalLinkage()) { int TotalSecondaryCost = 0; bool outerCallsFound = false; - bool allOuterCallsWillBeInlined = true; - bool someOuterCallWouldNotBeInlined = false; + // This bool tracks what happens if we do NOT inline C into B. + bool callerWillBeRemoved = true; + // This bool tracks what happens if we DO inline C into B. + bool inliningPreventsSomeOuterInline = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); I != E; ++I) { CallSite CS2(*I); // If this isn't a call to Caller (it could be some other sort - // of reference) skip it. - if (!CS2 || CS2.getCalledFunction() != Caller) + // of reference) skip it. Such references will prevent the caller + // from being removed. + if (!CS2 || CS2.getCalledFunction() != Caller) { + callerWillBeRemoved = false; continue; + } InlineCost IC2 = getInlineCost(CS2); if (IC2.isNever()) - allOuterCallsWillBeInlined = false; + callerWillBeRemoved = false; if (IC2.isAlways() || IC2.isNever()) continue; @@ -257,14 +279,14 @@ float FudgeFactor2 = getInlineFudgeFactor(CS2); if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) - allOuterCallsWillBeInlined = false; + callerWillBeRemoved = false; // See if we have this case. We subtract off the penalty // for the call instruction, which we would be deleting. if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= (int)(CurrentThreshold2 * FudgeFactor2)) { - someOuterCallWouldNotBeInlined = true; + inliningPreventsSomeOuterInline = true; TotalSecondaryCost += Cost2; } } @@ -272,10 +294,10 @@ // one is set very low by getInlineCost, in anticipation that Caller will // be removed entirely. We did not account for this above unless there // is only one caller of Caller. - if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end()) + if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; - if (outerCallsFound && someOuterCallWouldNotBeInlined && + if (outerCallsFound && inliningPreventsSomeOuterInline && TotalSecondaryCost < Cost) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << Cost << @@ -401,7 +423,7 @@ // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee - // itself. If so, we'd be recursively inlinling the same function, + // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. int InlineHistoryID = CallSites[CSi].second; @@ -416,7 +438,8 @@ continue; // Attempt to inline the function. - if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas)) + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, + InlineHistoryID)) continue; ++NumInlined;
diff --git a/src/LLVM/lib/Transforms/IPO/Internalize.cpp b/src/LLVM/lib/Transforms/IPO/Internalize.cpp index a1d919f..7cb1d18 100644 --- a/src/LLVM/lib/Transforms/IPO/Internalize.cpp +++ b/src/LLVM/lib/Transforms/IPO/Internalize.cpp
@@ -64,10 +64,11 @@ char InternalizePass::ID = 0; INITIALIZE_PASS(InternalizePass, "internalize", - "Internalize Global Symbols", false, false); + "Internalize Global Symbols", false, false) InternalizePass::InternalizePass(bool AllButMain) : ModulePass(ID), AllButMain(AllButMain){ + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); if (!APIFile.empty()) // If a filename is specified, use it. LoadFile(APIFile.c_str()); if (!APIList.empty()) // If a list is specified, use it as well. @@ -76,6 +77,7 @@ InternalizePass::InternalizePass(const std::vector<const char *>&exportList) : ModulePass(ID), AllButMain(false){ + initializeInternalizePassPass(*PassRegistry::getPassRegistry()); for(std::vector<const char *>::const_iterator itr = exportList.begin(); itr != exportList.end(); itr++) { ExternalNames.insert(*itr); @@ -124,6 +126,8 @@ // FIXME: maybe use private linkage? for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && // Function must be defined here + // Available externally is really just a "declaration with a body". + !I->hasAvailableExternallyLinkage() && !I->hasLocalLinkage() && // Can't already have internal linkage !ExternalNames.count(I->getName())) {// Not marked to keep external? I->setLinkage(GlobalValue::InternalLinkage); @@ -142,9 +146,6 @@ // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) - ExternalNames.insert("llvm.dbg.compile_units"); - ExternalNames.insert("llvm.dbg.global_variables"); - ExternalNames.insert("llvm.dbg.subprograms"); ExternalNames.insert("llvm.global_ctors"); ExternalNames.insert("llvm.global_dtors"); ExternalNames.insert("llvm.noinline");
diff --git a/src/LLVM/lib/Transforms/IPO/LLVMipo.vcxproj b/src/LLVM/lib/Transforms/IPO/LLVMipo.vcxproj new file mode 100644 index 0000000..8c0eada --- /dev/null +++ b/src/LLVM/lib/Transforms/IPO/LLVMipo.vcxproj
@@ -0,0 +1,387 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|Win32"> + <Configuration>Profile</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|x64"> + <Configuration>Profile</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{86CA752C-B6D8-4E70-B93B-5882F1766AD1}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>LLVMipo</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMipo.pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMipo.pdb</ProgramDataBaseFileName> + <OmitFramePointers>false</OmitFramePointers> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMipo.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMipo.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMipo.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMipo.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="ArgumentPromotion.cpp" /> + <ClCompile Include="ConstantMerge.cpp" /> + <ClCompile Include="DeadArgumentElimination.cpp" /> + <ClCompile Include="ExtractGV.cpp" /> + <ClCompile Include="FunctionAttrs.cpp" /> + <ClCompile Include="GlobalDCE.cpp" /> + <ClCompile Include="GlobalOpt.cpp" /> + <ClCompile Include="IPConstantPropagation.cpp" /> + <ClCompile Include="IPO.cpp" /> + <ClCompile Include="InlineAlways.cpp" /> + <ClCompile Include="InlineSimple.cpp" /> + <ClCompile Include="Inliner.cpp" /> + <ClCompile Include="Internalize.cpp" /> + <ClCompile Include="LoopExtractor.cpp" /> + <ClCompile Include="MergeFunctions.cpp" /> + <ClCompile Include="PartialInlining.cpp" /> + <ClCompile Include="PassManagerBuilder.cpp" /> + <ClCompile Include="PruneEH.cpp" /> + <ClCompile Include="StripDeadPrototypes.cpp" /> + <ClCompile Include="StripSymbols.cpp" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj"> + <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project> + </ProjectReference> + <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj"> + <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project> + </ProjectReference> + <ProjectReference Include="..\Scalar/LLVMScalarOpts.vcxproj"> + <Project>A3C67D8F-E19A-46EF-91AB-C7840FE2B97C</Project> + </ProjectReference> + <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj"> + <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project> + </ProjectReference> + <ProjectReference Include="..\..\Target/LLVMTarget.vcxproj"> + <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project> + </ProjectReference> + <ProjectReference Include="..\Utils/LLVMTransformUtils.vcxproj"> + <Project>5C514254-58EE-4850-8743-F5D7BEAA3E66</Project> + </ProjectReference> + <ProjectReference Include="..\..\Analysis/IPA/LLVMipa.vcxproj"> + <Project>32FFFFE6-D5B8-43F6-8E23-362A0D0252A1</Project> + </ProjectReference> + <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj"> + <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/IPO/LLVMipo.vcxproj.filters b/src/LLVM/lib/Transforms/IPO/LLVMipo.vcxproj.filters new file mode 100644 index 0000000..6e370f4 --- /dev/null +++ b/src/LLVM/lib/Transforms/IPO/LLVMipo.vcxproj.filters
@@ -0,0 +1,78 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <ClCompile Include="ArgumentPromotion.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ConstantMerge.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="DeadArgumentElimination.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ExtractGV.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="FunctionAttrs.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="GlobalDCE.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="GlobalOpt.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="IPConstantPropagation.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="IPO.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InlineAlways.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InlineSimple.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="Inliner.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="Internalize.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="LoopExtractor.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="MergeFunctions.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="PartialInlining.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="PassManagerBuilder.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="PruneEH.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="StripDeadPrototypes.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="StripSymbols.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/IPO/LoopExtractor.cpp b/src/LLVM/lib/Transforms/IPO/LoopExtractor.cpp index f88dff6..4f96afe 100644 --- a/src/LLVM/lib/Transforms/IPO/LoopExtractor.cpp +++ b/src/LLVM/lib/Transforms/IPO/LoopExtractor.cpp
@@ -23,6 +23,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/FunctionUtils.h" #include "llvm/ADT/Statistic.h" #include <fstream> @@ -37,7 +38,9 @@ unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : LoopPass(ID), NumLoops(numLoops) {} + : LoopPass(ID), NumLoops(numLoops) { + initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -50,8 +53,13 @@ } char LoopExtractor::ID = 0; -INITIALIZE_PASS(LoopExtractor, "loop-extract", - "Extract loops into new functions", false, false); +INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false) namespace { /// SingleLoopExtractor - For bugpoint. @@ -63,7 +71,7 @@ char SingleLoopExtractor::ID = 0; INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", - "Extract at most one loop into a new function", false, false); + "Extract at most one loop into a new function", false, false) // createLoopExtractorPass - This pass extracts all natural loops from the // program into a function if it can. @@ -93,9 +101,9 @@ L->getHeader()->getParent()->getEntryBlock().getTerminator(); if (!isa<BranchInst>(EntryTI) || !cast<BranchInst>(EntryTI)->isUnconditional() || - EntryTI->getSuccessor(0) != L->getHeader()) + EntryTI->getSuccessor(0) != L->getHeader()) { ShouldExtractLoop = true; - else { + } else { // Check to see if any exits from the loop are more than just return // blocks. SmallVector<BasicBlock*, 8> ExitBlocks; @@ -106,6 +114,21 @@ break; } } + + if (ShouldExtractLoop) { + // We must omit landing pads. Landing pads must accompany the invoke + // instruction. But this would result in a loop in the extracted + // function. An infinite cycle occurs when it tries to extract that loop as + // well. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i]->isLandingPad()) { + ShouldExtractLoop = false; + break; + } + } + if (ShouldExtractLoop) { if (NumLoops == 0) return Changed; --NumLoops; @@ -142,6 +165,7 @@ /// BlocksToNotExtract list. class BlockExtractorPass : public ModulePass { void LoadFile(const char *Filename); + void SplitLandingPadPreds(Function *F); std::vector<BasicBlock*> BlocksToNotExtract; std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName; @@ -159,13 +183,12 @@ char BlockExtractorPass::ID = 0; INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)", - false, false); + false, false) // createBlockExtractorPass - This pass extracts all blocks (except those // specified in the argument list) from the functions in the module. // -ModulePass *llvm::createBlockExtractorPass() -{ +ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractorPass(); } @@ -187,6 +210,37 @@ } } +/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke +/// instruction. The critical edge breaker will refuse to break critical edges +/// to a landing pad. So do them here. After this method runs, all landing pads +/// should have only one predecessor. +void BlockExtractorPass::SplitLandingPadPreds(Function *F) { + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + InvokeInst *II = dyn_cast<InvokeInst>(I); + if (!II) continue; + BasicBlock *Parent = II->getParent(); + BasicBlock *LPad = II->getUnwindDest(); + + // Look through the landing pad's predecessors. If one of them ends in an + // 'invoke', then we want to split the landing pad. + bool Split = false; + for (pred_iterator + PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { + BasicBlock *BB = *PI; + if (BB->isLandingPad() && BB != Parent && + isa<InvokeInst>(Parent->getTerminator())) { + Split = true; + break; + } + } + + if (!Split) continue; + + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs); + } +} + bool BlockExtractorPass::runOnModule(Module &M) { std::set<BasicBlock*> TranslatedBlocksToNotExtract; for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { @@ -229,13 +283,21 @@ // Now that we know which blocks to not extract, figure out which ones we WANT // to extract. std::vector<BasicBlock*> BlocksToExtract; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + SplitLandingPadPreds(&*F); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (!TranslatedBlocksToNotExtract.count(BB)) BlocksToExtract.push_back(BB); + } - for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) - ExtractBasicBlock(BlocksToExtract[i]); + for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) { + SmallVector<BasicBlock*, 2> BlocksToExtractVec; + BlocksToExtractVec.push_back(BlocksToExtract[i]); + if (const InvokeInst *II = + dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator())) + BlocksToExtractVec.push_back(II->getUnwindDest()); + ExtractBasicBlock(BlocksToExtractVec); + } return !BlocksToExtract.empty(); }
diff --git a/src/LLVM/lib/Transforms/IPO/MergeFunctions.cpp b/src/LLVM/lib/Transforms/IPO/MergeFunctions.cpp index d7075b9..0b01c38 100644 --- a/src/LLVM/lib/Transforms/IPO/MergeFunctions.cpp +++ b/src/LLVM/lib/Transforms/IPO/MergeFunctions.cpp
@@ -45,119 +45,38 @@ #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" -#include <map> #include <vector> using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); +STATISTIC(NumThunksWritten, "Number of thunks generated"); +STATISTIC(NumAliasesWritten, "Number of aliases generated"); +STATISTIC(NumDoubleWeak, "Number of new functions created"); -namespace { - /// MergeFunctions finds functions which will generate identical machine code, - /// by considering all pointer types to be equivalent. Once identified, - /// MergeFunctions will fold them by replacing a call to one to a call to a - /// bitcast of the other. - /// - class MergeFunctions : public ModulePass { - public: - static char ID; - MergeFunctions() : ModulePass(ID) {} - - bool runOnModule(Module &M); - - private: - /// PairwiseCompareAndMerge - Given a list of functions, compare each pair - /// and merge the pairs of equivalent functions. - bool PairwiseCompareAndMerge(std::vector<Function *> &FnVec); - - /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, - /// FnVec[j] should never be visited again. - void MergeTwoFunctions(std::vector<Function *> &FnVec, - unsigned i, unsigned j) const; - - /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also - /// replace direct uses of G with bitcast(F). - void WriteThunk(Function *F, Function *G) const; - - TargetData *TD; - }; -} - -char MergeFunctions::ID = 0; -INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false); - -ModulePass *llvm::createMergeFunctionsPass() { - return new MergeFunctions(); -} - -namespace { -/// FunctionComparator - Compares two functions to determine whether or not -/// they will generate machine code with the same behaviour. TargetData is -/// used if available. The comparator always fails conservatively (erring on the -/// side of claiming that two functions are different). -class FunctionComparator { -public: - FunctionComparator(TargetData *TD, Function *F1, Function *F2) - : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {} - - /// Compare - test whether the two functions have equivalent behaviour. - bool Compare(); - -private: - /// Compare - test whether two basic blocks have equivalent behaviour. - bool Compare(const BasicBlock *BB1, const BasicBlock *BB2); - - /// Enumerate - Assign or look up previously assigned numbers for the two - /// values, and return whether the numbers are equal. Numbers are assigned in - /// the order visited. - bool Enumerate(const Value *V1, const Value *V2); - - /// isEquivalentOperation - Compare two Instructions for equivalence, similar - /// to Instruction::isSameOperationAs but with modifications to the type - /// comparison. - bool isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const; - - /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic. - bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); - bool isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2) { - return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); - } - - /// isEquivalentType - Compare two Types, treating all pointer types as equal. - bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; - - // The two functions undergoing comparison. - Function *F1, *F2; - - TargetData *TD; - - typedef DenseMap<const Value *, unsigned long> IDMap; - IDMap Map1, Map2; - unsigned long IDMap1Count, IDMap2Count; -}; -} - -/// Compute a hash guaranteed to be equal for two equivalent functions, but -/// very likely to be different for different functions. -static unsigned long ProfileFunction(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); +/// Creates a hash-code for the function which is the same for any two +/// functions that will compare equal, without looking at the instructions +/// inside the function. +static unsigned profileFunction(const Function *F) { + FunctionType *FTy = F->getFunctionType(); FoldingSetNodeID ID; ID.AddInteger(F->size()); @@ -170,21 +89,136 @@ return ID.ComputeHash(); } -/// isEquivalentType - any two pointers in the same address space are -/// equivalent. Otherwise, standard type equivalence rules apply. -bool FunctionComparator::isEquivalentType(const Type *Ty1, - const Type *Ty2) const { +namespace { + +/// ComparableFunction - A struct that pairs together functions with a +/// TargetData so that we can keep them together as elements in the DenseSet. +class ComparableFunction { +public: + static const ComparableFunction EmptyKey; + static const ComparableFunction TombstoneKey; + static TargetData * const LookupOnly; + + ComparableFunction(Function *Func, TargetData *TD) + : Func(Func), Hash(profileFunction(Func)), TD(TD) {} + + Function *getFunc() const { return Func; } + unsigned getHash() const { return Hash; } + TargetData *getTD() const { return TD; } + + // Drops AssertingVH reference to the function. Outside of debug mode, this + // does nothing. + void release() { + assert(Func && + "Attempted to release function twice, or release empty/tombstone!"); + Func = NULL; + } + +private: + explicit ComparableFunction(unsigned Hash) + : Func(NULL), Hash(Hash), TD(NULL) {} + + AssertingVH<Function> Func; + unsigned Hash; + TargetData *TD; +}; + +const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); +const ComparableFunction ComparableFunction::TombstoneKey = + ComparableFunction(1); +TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1); + +} + +namespace llvm { + template <> + struct DenseMapInfo<ComparableFunction> { + static ComparableFunction getEmptyKey() { + return ComparableFunction::EmptyKey; + } + static ComparableFunction getTombstoneKey() { + return ComparableFunction::TombstoneKey; + } + static unsigned getHashValue(const ComparableFunction &CF) { + return CF.getHash(); + } + static bool isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS); + }; +} + +namespace { + +/// FunctionComparator - Compares two functions to determine whether or not +/// they will generate machine code with the same behaviour. TargetData is +/// used if available. The comparator always fails conservatively (erring on the +/// side of claiming that two functions are different). +class FunctionComparator { +public: + FunctionComparator(const TargetData *TD, const Function *F1, + const Function *F2) + : F1(F1), F2(F2), TD(TD) {} + + /// Test whether the two functions have equivalent behaviour. + bool compare(); + +private: + /// Test whether two basic blocks have equivalent behaviour. + bool compare(const BasicBlock *BB1, const BasicBlock *BB2); + + /// Assign or look up previously assigned numbers for the two values, and + /// return whether the numbers are equal. Numbers are assigned in the order + /// visited. + bool enumerate(const Value *V1, const Value *V2); + + /// Compare two Instructions for equivalence, similar to + /// Instruction::isSameOperationAs but with modifications to the type + /// comparison. + bool isEquivalentOperation(const Instruction *I1, + const Instruction *I2) const; + + /// Compare two GEPs for equivalent pointer arithmetic. + bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); + } + + /// Compare two Types, treating all pointer types as equal. + bool isEquivalentType(Type *Ty1, Type *Ty2) const; + + // The two functions undergoing comparison. + const Function *F1, *F2; + + const TargetData *TD; + + DenseMap<const Value *, const Value *> id_map; + DenseSet<const Value *> seen_values; +}; + +} + +// Any two pointers in the same address space are equivalent, intptr_t and +// pointers are equivalent. Otherwise, standard type equivalence rules apply. +bool FunctionComparator::isEquivalentType(Type *Ty1, + Type *Ty2) const { if (Ty1 == Ty2) return true; - if (Ty1->getTypeID() != Ty2->getTypeID()) + if (Ty1->getTypeID() != Ty2->getTypeID()) { + if (TD) { + LLVMContext &Ctx = Ty1->getContext(); + if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true; + if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true; + } return false; + } - switch(Ty1->getTypeID()) { + switch (Ty1->getTypeID()) { default: llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: - case Type::OpaqueTyID: + case Type::VectorTyID: // Ty1 == Ty2 would have returned true earlier. return false; @@ -199,14 +233,14 @@ return true; case Type::PointerTyID: { - const PointerType *PTy1 = cast<PointerType>(Ty1); - const PointerType *PTy2 = cast<PointerType>(Ty2); + PointerType *PTy1 = cast<PointerType>(Ty1); + PointerType *PTy2 = cast<PointerType>(Ty2); return PTy1->getAddressSpace() == PTy2->getAddressSpace(); } case Type::StructTyID: { - const StructType *STy1 = cast<StructType>(Ty1); - const StructType *STy2 = cast<StructType>(Ty2); + StructType *STy1 = cast<StructType>(Ty1); + StructType *STy2 = cast<StructType>(Ty2); if (STy1->getNumElements() != STy2->getNumElements()) return false; @@ -220,23 +254,9 @@ return true; } - case Type::UnionTyID: { - const UnionType *UTy1 = cast<UnionType>(Ty1); - const UnionType *UTy2 = cast<UnionType>(Ty2); - - if (UTy1->getNumElements() != UTy2->getNumElements()) - return false; - - for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) { - if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i))) - return false; - } - return true; - } - case Type::FunctionTyID: { - const FunctionType *FTy1 = cast<FunctionType>(Ty1); - const FunctionType *FTy2 = cast<FunctionType>(Ty2); + FunctionType *FTy1 = cast<FunctionType>(Ty1); + FunctionType *FTy2 = cast<FunctionType>(Ty2); if (FTy1->getNumParams() != FTy2->getNumParams() || FTy1->isVarArg() != FTy2->isVarArg()) return false; @@ -252,26 +272,23 @@ } case Type::ArrayTyID: { - const ArrayType *ATy1 = cast<ArrayType>(Ty1); - const ArrayType *ATy2 = cast<ArrayType>(Ty2); + ArrayType *ATy1 = cast<ArrayType>(Ty1); + ArrayType *ATy2 = cast<ArrayType>(Ty2); return ATy1->getNumElements() == ATy2->getNumElements() && isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); } - - case Type::VectorTyID: { - const VectorType *VTy1 = cast<VectorType>(Ty1); - const VectorType *VTy2 = cast<VectorType>(Ty2); - return VTy1->getNumElements() == VTy2->getNumElements() && - isEquivalentType(VTy1->getElementType(), VTy2->getElementType()); - } } } -/// isEquivalentOperation - determine whether the two operations are the same -/// except that pointer-to-A and pointer-to-B are equivalent. This should be -/// kept in sync with Instruction::isSameOperationAs. +// Determine whether the two operations are the same except that pointer-to-A +// and pointer-to-B are equivalent. This should be kept in sync with +// Instruction::isSameOperationAs. bool FunctionComparator::isEquivalentOperation(const Instruction *I1, const Instruction *I2) const { + // Differences from Instruction::isSameOperationAs: + // * replace type comparison with calls to isEquivalentType. + // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top + // * because of the above, we don't test for the tail bit on calls later on if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || !isEquivalentType(I1->getType(), I2->getType()) || @@ -288,43 +305,43 @@ // Check special state that is a part of some instructions. if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I2)->getAlignment(); + LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() && + LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() && + LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope(); if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I2)->getAlignment(); + SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() && + SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() && + SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope(); if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(I1)) - return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() && - CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<CallInst>(I2)->getAttributes().getRawPointer(); + return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && + CI->getAttributes() == cast<CallInst>(I2)->getAttributes(); if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && - CI->getAttributes().getRawPointer() == - cast<InvokeInst>(I2)->getAttributes().getRawPointer(); - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { - if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) - return false; - for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) - if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i]) - return false; - return true; - } - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) { - if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices()) - return false; - for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) - if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i]) - return false; - return true; - } + CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) + return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) + return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices(); + if (const FenceInst *FI = dyn_cast<FenceInst>(I1)) + return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() && + FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope(); + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1)) + return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() && + CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() && + CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope(); + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1)) + return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() && + RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() && + RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() && + RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope(); return true; } -/// isEquivalentGEP - determine whether two GEP operations perform the same -/// underlying arithmetic. +// Determine whether two GEP operations perform the same underlying arithmetic. bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { // When we have target data, we can reduce the GEP down to the value in bytes @@ -333,9 +350,9 @@ SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end()); SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end()); uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), - Indices1.data(), Indices1.size()); + Indices1); uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), - Indices2.data(), Indices2.size()); + Indices2); return Offset1 == Offset2; } @@ -347,17 +364,17 @@ return false; for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) + if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) return false; } return true; } -/// Enumerate - Compare two values used by the two functions under pair-wise -/// comparison. If this is the first time the values are seen, they're added to -/// the mapping so that we will detect mismatches on next use. -bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { +// Compare two values used by the two functions under pair-wise comparison. If +// this is the first time the values are seen, they're added to the mapping so +// that we will detect mismatches on next use. +bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { // Check for function @f1 referring to itself and function @f2 referring to // itself, or referring to each other, or both referring to either of them. // They're all equivalent if the two functions are otherwise equivalent. @@ -366,35 +383,44 @@ if (V1 == F2 && V2 == F1) return true; - // TODO: constant expressions with GEP or references to F1 or F2. - if (isa<Constant>(V1)) - return V1 == V2; - - if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) { - const InlineAsm *IA1 = cast<InlineAsm>(V1); - const InlineAsm *IA2 = cast<InlineAsm>(V2); - return IA1->getAsmString() == IA2->getAsmString() && - IA1->getConstraintString() == IA2->getConstraintString(); + if (const Constant *C1 = dyn_cast<Constant>(V1)) { + if (V1 == V2) return true; + const Constant *C2 = dyn_cast<Constant>(V2); + if (!C2) return false; + // TODO: constant expressions with GEP or references to F1 or F2. + if (C1->isNullValue() && C2->isNullValue() && + isEquivalentType(C1->getType(), C2->getType())) + return true; + // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 + // then they must have equal bit patterns. + return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && + C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType()); } - unsigned long &ID1 = Map1[V1]; - if (!ID1) - ID1 = ++IDMap1Count; + if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2)) + return V1 == V2; - unsigned long &ID2 = Map2[V2]; - if (!ID2) - ID2 = ++IDMap2Count; + // Check that V1 maps to V2. If we find a value that V1 maps to then we simply + // check whether it's equal to V2. When there is no mapping then we need to + // ensure that V2 isn't already equivalent to something else. For this + // purpose, we track the V2 values in a set. - return ID1 == ID2; + const Value *&map_elem = id_map[V1]; + if (map_elem) + return map_elem == V2; + if (!seen_values.insert(V2).second) + return false; + map_elem = V2; + return true; } -/// Compare - test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { +// Test whether two basic blocks have equivalent behaviour. +bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); do { - if (!Enumerate(F1I, F2I)) + if (!enumerate(F1I, F2I)) return false; if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { @@ -402,7 +428,7 @@ if (!GEP2) return false; - if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; if (!isEquivalentGEP(GEP1, GEP2)) @@ -416,7 +442,7 @@ Value *OpF1 = F1I->getOperand(i); Value *OpF2 = F2I->getOperand(i); - if (!Enumerate(OpF1, OpF2)) + if (!enumerate(OpF1, OpF2)) return false; if (OpF1->getValueID() != OpF2->getValueID() || @@ -431,8 +457,8 @@ return F1I == F1E && F2I == F2E; } -/// Compare - test whether the two functions have equivalent behaviour. -bool FunctionComparator::Compare() { +// Test whether the two functions have equivalent behaviour. +bool FunctionComparator::compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. @@ -463,14 +489,14 @@ return false; assert(F1->arg_size() == F2->arg_size() && - "Identical functions have a different number of args."); + "Identically typed functions have different numbers of args!"); // Visit the arguments so that they get enumerated in the order they're // passed in. for (Function::const_arg_iterator f1i = F1->arg_begin(), f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { - if (!Enumerate(f1i, f2i)) - llvm_unreachable("Arguments repeat"); + if (!enumerate(f1i, f2i)) + llvm_unreachable("Arguments repeat!"); } // We do a CFG-ordered walk since the actual ordering of the blocks in the @@ -488,7 +514,7 @@ const BasicBlock *F1BB = F1BBs.pop_back_val(); const BasicBlock *F2BB = F2BBs.pop_back_val(); - if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB)) + if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) return false; const TerminatorInst *F1TI = F1BB->getTerminator(); @@ -506,23 +532,190 @@ return true; } -/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace -/// direct uses of G with bitcast(F). -void MergeFunctions::WriteThunk(Function *F, Function *G) const { - if (!G->mayBeOverridden()) { - // Redirect direct callers of G to F. - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); - UI != UE;) { - Value::use_iterator TheIter = UI; - ++UI; - CallSite CS(*TheIter); - if (CS && CS.isCallee(TheIter)) - TheIter.getUse().set(BitcastF); +namespace { + +/// MergeFunctions finds functions which will generate identical machine code, +/// by considering all pointer types to be equivalent. Once identified, +/// MergeFunctions will fold them by replacing a call to one to a call to a +/// bitcast of the other. +/// +class MergeFunctions : public ModulePass { +public: + static char ID; + MergeFunctions() + : ModulePass(ID), HasGlobalAliases(false) { + initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M); + +private: + typedef DenseSet<ComparableFunction> FnSetType; + + /// A work queue of functions that may have been modified and should be + /// analyzed again. + std::vector<WeakVH> Deferred; + + /// Insert a ComparableFunction into the FnSet, or merge it away if it's + /// equal to one that's already present. + bool insert(ComparableFunction &NewF); + + /// Remove a Function from the FnSet and queue it up for a second sweep of + /// analysis. + void remove(Function *F); + + /// Find the functions that use this Value and remove them from FnSet and + /// queue the functions. + void removeUsers(Value *V); + + /// Replace all direct calls of Old with calls of New. Will bitcast New if + /// necessary to make types match. + void replaceDirectCallers(Function *Old, Function *New); + + /// Merge two equivalent functions. Upon completion, G may be deleted, or may + /// be converted into a thunk. In either case, it should never be visited + /// again. + void mergeTwoFunctions(Function *F, Function *G); + + /// Replace G with a thunk or an alias to F. Deletes G. + void writeThunkOrAlias(Function *F, Function *G); + + /// Replace G with a simple tail call to bitcast(F). Also replace direct uses + /// of G with bitcast(F). Deletes G. + void writeThunk(Function *F, Function *G); + + /// Replace G with an alias to F. Deletes G. + void writeAlias(Function *F, Function *G); + + /// The set of all distinct functions. Use the insert() and remove() methods + /// to modify it. + FnSetType FnSet; + + /// TargetData for more accurate GEP comparisons. May be NULL. + TargetData *TD; + + /// Whether or not the target supports global aliases. + bool HasGlobalAliases; +}; + +} // end anonymous namespace + +char MergeFunctions::ID = 0; +INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) + +ModulePass *llvm::createMergeFunctionsPass() { + return new MergeFunctions(); +} + +bool MergeFunctions::runOnModule(Module &M) { + bool Changed = false; + TD = getAnalysisIfAvailable<TargetData>(); + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) + Deferred.push_back(WeakVH(I)); + } + FnSet.resize(Deferred.size()); + + do { + std::vector<WeakVH> Worklist; + Deferred.swap(Worklist); + + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); + DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); + + // Insert only strong functions and merge them. Strong function merging + // always deletes one of them. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + !F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + + // Insert only weak functions and merge them. By doing these second we + // create thunks to the strong function when possible. When two weak + // functions are identical, we create a new strong function with two weak + // weak thunks to it which are identical but not mergable. + for (std::vector<WeakVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + if (!*I) continue; + Function *F = cast<Function>(*I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && + F->mayBeOverridden()) { + ComparableFunction CF = ComparableFunction(F, TD); + Changed |= insert(CF); + } + } + DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); + } while (!Deferred.empty()); + + FnSet.clear(); + + return Changed; +} + +bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, + const ComparableFunction &RHS) { + if (LHS.getFunc() == RHS.getFunc() && + LHS.getHash() == RHS.getHash()) + return true; + if (!LHS.getFunc() || !RHS.getFunc()) + return false; + + // One of these is a special "underlying pointer comparison only" object. + if (LHS.getTD() == ComparableFunction::LookupOnly || + RHS.getTD() == ComparableFunction::LookupOnly) + return false; + + assert(LHS.getTD() == RHS.getTD() && + "Comparing functions for different targets"); + + return FunctionComparator(LHS.getTD(), LHS.getFunc(), + RHS.getFunc()).compare(); +} + +// Replace direct callers of Old with New. +void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { + Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) { + remove(CS.getInstruction()->getParent()->getParent()); + TheIter.getUse().set(BitcastNew); + } + } +} + +// Replace G with an alias to F if possible, or else a thunk to F. Deletes G. +void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { + if (HasGlobalAliases && G->hasUnnamedAddr()) { + if (G->hasExternalLinkage() || G->hasLocalLinkage() || + G->hasWeakLinkage()) { + writeAlias(F, G); + return; } } - // If G was internal then we may have replaced all uses if G with F. If so, + writeThunk(F, G); +} + +// Replace G with a simple tail call to bitcast(F). Also replace direct uses +// of G with bitcast(F). Deletes G. +void MergeFunctions::writeThunk(Function *F, Function *G) { + if (!G->mayBeOverridden()) { + // Redirect direct callers of G to F. + replaceDirectCallers(G, F); + } + + // If G was internal then we may have replaced all uses of G with F. If so, // stop here and delete G. There's no need for a thunk. if (G->hasLocalLinkage() && G->use_empty()) { G->eraseFromParent(); @@ -536,14 +729,14 @@ SmallVector<Value *, 16> Args; unsigned i = 0; - const FunctionType *FFTy = F->getFunctionType(); + FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i))); ++i; } - CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end()); + CallInst *CI = Builder.CreateCall(F, Args); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); if (NewG->getReturnType()->isVoidTy()) { @@ -554,94 +747,126 @@ NewG->copyAttributesFrom(G); NewG->takeName(G); + removeUsers(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); + + DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); + ++NumThunksWritten; } -/// MergeTwoFunctions - Merge two equivalent functions. Upon completion, -/// FnVec[j] is deleted but not removed from the vector. -void MergeFunctions::MergeTwoFunctions(std::vector<Function *> &FnVec, - unsigned i, unsigned j) const { - Function *F = FnVec[i]; - Function *G = FnVec[j]; +// Replace G with an alias to F and delete G. +void MergeFunctions::writeAlias(Function *F, Function *G) { + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", + BitcastF, G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + removeUsers(G); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); - if (F->isWeakForLinker() && !G->isWeakForLinker()) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - } + DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); + ++NumAliasesWritten; +} - if (F->isWeakForLinker()) { - assert(G->isWeakForLinker()); +// Merge two equivalent functions. Upon completion, Function G is deleted. +void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { + if (F->mayBeOverridden()) { + assert(G->mayBeOverridden()); - // Make them both thunks to the same internal function. - Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", - F->getParent()); - H->copyAttributesFrom(F); - H->takeName(F); - F->replaceAllUsesWith(H); + if (HasGlobalAliases) { + // Make them both thunks to the same internal function. + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + removeUsers(F); + F->replaceAllUsesWith(H); - unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); - WriteThunk(F, G); - WriteThunk(F, H); + writeAlias(F, G); + writeAlias(F, H); - F->setAlignment(MaxAlignment); - F->setLinkage(GlobalValue::InternalLinkage); + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::PrivateLinkage); + } else { + // We can't merge them. Instead, pick one and update all direct callers + // to call it and hope that we improve the instruction cache hit rate. + replaceDirectCallers(G, F); + } + + ++NumDoubleWeak; } else { - WriteThunk(F, G); + writeThunkOrAlias(F, G); } ++NumFunctionsMerged; } -/// PairwiseCompareAndMerge - Given a list of functions, compare each pair and -/// merge the pairs of equivalent functions. -bool MergeFunctions::PairwiseCompareAndMerge(std::vector<Function *> &FnVec) { - bool Changed = false; - for (int i = 0, e = FnVec.size(); i != e; ++i) { - for (int j = i + 1; j != e; ++j) { - bool isEqual = FunctionComparator(TD, FnVec[i], FnVec[j]).Compare(); +// Insert a ComparableFunction into the FnSet, or merge it away if equal to one +// that was already inserted. +bool MergeFunctions::insert(ComparableFunction &NewF) { + std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); + if (Result.second) { + DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); + return false; + } - DEBUG(dbgs() << " " << FnVec[i]->getName() - << (isEqual ? " == " : " != ") << FnVec[j]->getName() << "\n"); + const ComparableFunction &OldF = *Result.first; - if (isEqual) { - MergeTwoFunctions(FnVec, i, j); - Changed = true; - FnVec.erase(FnVec.begin() + j); - --j, --e; + // Never thunk a strong function to a weak function. + assert(!OldF.getFunc()->mayBeOverridden() || + NewF.getFunc()->mayBeOverridden()); + + DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " + << NewF.getFunc()->getName() << '\n'); + + Function *DeleteF = NewF.getFunc(); + NewF.release(); + mergeTwoFunctions(OldF.getFunc(), DeleteF); + return true; +} + +// Remove a function from FnSet. If it was already in FnSet, add it to Deferred +// so that we'll look at it in the next round. +void MergeFunctions::remove(Function *F) { + // We need to make sure we remove F, not a function "equal" to F per the + // function equality comparator. + // + // The special "lookup only" ComparableFunction bypasses the expensive + // function comparison in favour of a pointer comparison on the underlying + // Function*'s. + ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); + if (FnSet.erase(CF)) { + DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); + Deferred.push_back(F); + } +} + +// For each instruction used by the value, remove() the function that contains +// the instruction. This should happen right before a call to RAUW. +void MergeFunctions::removeUsers(Value *V) { + std::vector<Value *> Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + Use &U = UI.getUse(); + if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { + remove(I->getParent()->getParent()); + } else if (isa<GlobalValue>(U.getUser())) { + // do nothing + } else if (Constant *C = dyn_cast<Constant>(U.getUser())) { + for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end(); + CUI != CUE; ++CUI) + Worklist.push_back(*CUI); } } } - return Changed; -} - -bool MergeFunctions::runOnModule(Module &M) { - bool Changed = false; - - std::map<unsigned long, std::vector<Function *> > FnMap; - - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) - continue; - - FnMap[ProfileFunction(F)].push_back(F); - } - - TD = getAnalysisIfAvailable<TargetData>(); - - bool LocalChanged; - do { - LocalChanged = false; - DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); - for (std::map<unsigned long, std::vector<Function *> >::iterator - I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { - std::vector<Function *> &FnVec = I->second; - DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); - LocalChanged |= PairwiseCompareAndMerge(FnVec); - } - Changed |= LocalChanged; - } while (LocalChanged); - - return Changed; }
diff --git a/src/LLVM/lib/Transforms/IPO/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/IPO/PACKAGE.vcxproj new file mode 100644 index 0000000..84f39a5 --- /dev/null +++ b/src/LLVM/lib/Transforms/IPO/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\IPO;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/IPO/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/IPO/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/IPO/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/IPO/PartialInlining.cpp b/src/LLVM/lib/Transforms/IPO/PartialInlining.cpp index eba7b3a..d9d1d10 100644 --- a/src/LLVM/lib/Transforms/IPO/PartialInlining.cpp +++ b/src/LLVM/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,9 @@ struct PartialInliner : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - PartialInliner() : ModulePass(ID) {} + PartialInliner() : ModulePass(ID) { + initializePartialInlinerPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module& M); @@ -41,7 +43,7 @@ char PartialInliner::ID = 0; INITIALIZE_PASS(PartialInliner, "partial-inliner", - "Partial Inliner", false, false); + "Partial Inliner", false, false) ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } @@ -67,8 +69,9 @@ return 0; // Clone the function, so that we can hack away on it. - ValueMap<const Value*, Value*> VMap; - Function* duplicateFunction = CloneFunction(F, VMap); + ValueToValueMapTy VMap; + Function* duplicateFunction = CloneFunction(F, VMap, + /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); @@ -92,7 +95,7 @@ PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; - PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins); + PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI();
diff --git a/src/LLVM/lib/Transforms/IPO/PassManagerBuilder.cpp b/src/LLVM/lib/Transforms/IPO/PassManagerBuilder.cpp new file mode 100644 index 0000000..8fdfd72 --- /dev/null +++ b/src/LLVM/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -0,0 +1,343 @@ +//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#include "llvm-c/Transforms/PassManagerBuilder.h" + +#include "llvm/PassManager.h" +#include "llvm/DefaultPasses.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +PassManagerBuilder::PassManagerBuilder() { + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = 0; + Inliner = 0; + DisableSimplifyLibCalls = false; + DisableUnitAtATime = false; + DisableUnrollLoops = false; +} + +PassManagerBuilder::~PassManagerBuilder() { + delete LibraryInfo; + delete Inliner; +} + +/// Set of global extensions, automatically added as part of the standard set. +static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, + PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; + +void PassManagerBuilder::addGlobalExtension( + PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + GlobalExtensions->push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { + Extensions.push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, + PassManagerBase &PM) const { + for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) + if ((*GlobalExtensions)[i].first == ETy) + (*GlobalExtensions)[i].second(*this, PM); + for (unsigned i = 0, e = Extensions.size(); i != e; ++i) + if (Extensions[i].first == ETy) + Extensions[i].second(*this, PM); +} + +void +PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const { + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); +} + +void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) { + addExtensionsToPM(EP_EarlyAsPossible, FPM); + + // Add LibraryInfo if we have some. + if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo)); + + if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + FPM.add(createCFGSimplificationPass()); + FPM.add(createScalarReplAggregatesPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); +} + +void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { + // If all optimizations are disabled, just run the always-inline pass. + if (OptLevel == 0) { + if (Inliner) { + MPM.add(Inliner); + Inliner = 0; + } + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (!DisableUnitAtATime) { + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + } + + // Start of CallGraph SCC passes. + if (!DisableUnitAtATime) + MPM.add(createPruneEHPass()); // Remove dead EH info + if (Inliner) { + MPM.add(Inliner); + Inliner = 0; + } + if (!DisableUnitAtATime) + MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + // Start of function pass. + // Break up aggregate allocas, using SSAUpdater. + MPM.add(createScalarReplAggregatesPass(-1, false)); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + if (!DisableSimplifyLibCalls) + MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + MPM.add(createLoopRotatePass()); // Rotate Loop + MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createInstructionCombiningPass()); + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); // Unroll small loops + addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + + if (OptLevel > 1) + MPM.add(createGVNPass()); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + MPM.add(createInstructionCombiningPass()); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + + addExtensionsToPM(EP_ScalarOptimizerLate, MPM); + + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Clean up after everything. + + if (!DisableUnitAtATime) { + // FIXME: We shouldn't bother with this anymore. + MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O3 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + if (OptLevel > 2) + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + + if (OptLevel > 1) + MPM.add(createConstantMergePass()); // Merge dup global constants + } +} + +void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, + bool Internalize, + bool RunInliner) { + // Provide AliasAnalysis services for optimizations. + addInitialAliasAnalysisPasses(PM); + + // Now that composite has been compiled, scan through the module, looking + // for a main function. If main is defined, mark all other functions + // internal. + if (Internalize) + PM.add(createInternalizePass(true)); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + + // Now that we internalized some globals, see if we can hack on them! + PM.add(createGlobalOptimizerPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant. + PM.add(createConstantMergePass()); + + // Remove unused arguments from functions. + PM.add(createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + PM.add(createInstructionCombiningPass()); + + // Inline small functions + if (RunInliner) + PM.add(createFunctionInliningPass()); + + PM.add(createPruneEHPass()); // Remove dead EH info. + + // Optimize globals again if we ran the inliner. + if (RunInliner) + PM.add(createGlobalOptimizerPass()); + PM.add(createGlobalDCEPass()); // Remove dead functions. + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + PM.add(createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + PM.add(createInstructionCombiningPass()); + PM.add(createJumpThreadingPass()); + // Break up allocas + PM.add(createScalarReplAggregatesPass()); + + // Run a few AA driven optimizations here and now, to cleanup the code. + PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createGlobalsModRefPass()); // IP alias analysis. + + PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createGVNPass()); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. + // Nuke dead stores. + PM.add(createDeadStoreEliminationPass()); + + // Cleanup and simplify the code after the scalar optimizations. + PM.add(createInstructionCombiningPass()); + + PM.add(createJumpThreadingPass()); + + // Delete basic blocks, which optimization passes may have killed. + PM.add(createCFGSimplificationPass()); + + // Now that we have optimized the program, discard unreachable functions. + PM.add(createGlobalDCEPass()); +} + +LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) { + PassManagerBuilder *PMB = new PassManagerBuilder(); + return wrap(PMB); +} + +void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { + PassManagerBuilder *Builder = unwrap(PMB); + delete Builder; +} + +void +LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->OptLevel = OptLevel; +} + +void +LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->SizeLevel = SizeLevel; +} + +void +LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnitAtATime = Value; +} + +void +LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnrollLoops = Value; +} + +void +LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableSimplifyLibCalls = Value; +} + +void +LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, + unsigned Threshold) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->Inliner = createFunctionInliningPass(Threshold); +} + +void +LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM); + Builder->populateFunctionPassManager(*FPM); +} + +void +LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + PassManagerBase *MPM = unwrap(PM); + Builder->populateModulePassManager(*MPM); +} + +void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM, + bool Internalize, + bool RunInliner) { + PassManagerBuilder *Builder = unwrap(PMB); + PassManagerBase *LPM = unwrap(PM); + Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); +} +
diff --git a/src/LLVM/lib/Transforms/IPO/PruneEH.cpp b/src/LLVM/lib/Transforms/IPO/PruneEH.cpp index 09ac76f..cbb80f0 100644 --- a/src/LLVM/lib/Transforms/IPO/PruneEH.cpp +++ b/src/LLVM/lib/Transforms/IPO/PruneEH.cpp
@@ -27,7 +27,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" -#include <set> #include <algorithm> using namespace llvm; @@ -37,7 +36,9 @@ namespace { struct PruneEH : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - PruneEH() : CallGraphSCCPass(ID) {} + PruneEH() : CallGraphSCCPass(ID) { + initializePruneEHPass(*PassRegistry::getPassRegistry()); + } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -48,8 +49,11 @@ } char PruneEH::ID = 0; -INITIALIZE_PASS(PruneEH, "prune-eh", - "Remove unused exception handling info", false, false); +INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_END(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false) Pass *llvm::createPruneEHPass() { return new PruneEH(); } @@ -97,8 +101,9 @@ // Check to see if this function performs an unwind or calls an // unwinding function. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) { - // Uses unwind! + if (CheckUnwind && (isa<UnwindInst>(BB->getTerminator()) || + isa<ResumeInst>(BB->getTerminator()))) { + // Uses unwind / resume! SCCMightUnwind = true; } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) { SCCMightReturn = true; @@ -171,11 +176,11 @@ if (II->doesNotThrow()) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), "", II); + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); + Call->setDebugLoc(II->getDebugLoc()); // Anything that used the value produced by the invoke instruction // now uses the value produced by the call instruction. Note that we @@ -234,7 +239,7 @@ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { --I; if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (!isa<DbgInfoIntrinsic>(I)) + if (!isa<IntrinsicInst>(I)) CGN->removeCallEdgeFor(CI); } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) CGN->removeCallEdgeFor(II);
diff --git a/src/LLVM/lib/Transforms/IPO/StripDeadPrototypes.cpp b/src/LLVM/lib/Transforms/IPO/StripDeadPrototypes.cpp index ee10ad0..b5f09ec 100644 --- a/src/LLVM/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/src/LLVM/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -29,7 +29,9 @@ class StripDeadPrototypesPass : public ModulePass { public: static char ID; // Pass identification, replacement for typeid - StripDeadPrototypesPass() : ModulePass(ID) { } + StripDeadPrototypesPass() : ModulePass(ID) { + initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); }; @@ -37,7 +39,7 @@ char StripDeadPrototypesPass::ID = 0; INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes", - "Strip Unused Function Prototypes", false, false); + "Strip Unused Function Prototypes", false, false) bool StripDeadPrototypesPass::runOnModule(Module &M) { bool MadeChange = false;
diff --git a/src/LLVM/lib/Transforms/IPO/StripSymbols.cpp b/src/LLVM/lib/Transforms/IPO/StripSymbols.cpp index c6c4fde..b5caa9a 100644 --- a/src/LLVM/lib/Transforms/IPO/StripSymbols.cpp +++ b/src/LLVM/lib/Transforms/IPO/StripSymbols.cpp
@@ -28,8 +28,8 @@ #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ValueSymbolTable.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -39,7 +39,9 @@ public: static char ID; // Pass identification, replacement for typeid explicit StripSymbols(bool ODI = false) - : ModulePass(ID), OnlyDebugInfo(ODI) {} + : ModulePass(ID), OnlyDebugInfo(ODI) { + initializeStripSymbolsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -52,7 +54,9 @@ public: static char ID; // Pass identification, replacement for typeid explicit StripNonDebugSymbols() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -65,7 +69,9 @@ public: static char ID; // Pass identification, replacement for typeid explicit StripDebugDeclare() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -78,7 +84,9 @@ public: static char ID; // Pass identification, replacement for typeid explicit StripDeadDebugInfo() - : ModulePass(ID) {} + : ModulePass(ID) { + initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnModule(Module &M); @@ -90,7 +98,7 @@ char StripSymbols::ID = 0; INITIALIZE_PASS(StripSymbols, "strip", - "Strip all symbols from a module", false, false); + "Strip all symbols from a module", false, false) ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { return new StripSymbols(OnlyDebugInfo); @@ -99,7 +107,7 @@ char StripNonDebugSymbols::ID = 0; INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug", "Strip all symbols, except dbg symbols, from a module", - false, false); + false, false) ModulePass *llvm::createStripNonDebugSymbolsPass() { return new StripNonDebugSymbols(); @@ -107,7 +115,7 @@ char StripDebugDeclare::ID = 0; INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare", - "Strip all llvm.dbg.declare intrinsics", false, false); + "Strip all llvm.dbg.declare intrinsics", false, false) ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); @@ -115,7 +123,7 @@ char StripDeadDebugInfo::ID = 0; INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info", - "Strip debug info for unused symbols", false, false); + "Strip debug info for unused symbols", false, false) ModulePass *llvm::createStripDeadDebugInfoPass() { return new StripDeadDebugInfo(); @@ -135,8 +143,7 @@ assert(C->use_empty() && "Constant is not dead!"); SmallPtrSet<Constant*, 4> Operands; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - if (isa<DerivedType>(C->getOperand(i)->getType()) && - OnlyUsedBy(C->getOperand(i), C)) + if (OnlyUsedBy(C->getOperand(i), C)) Operands.insert(cast<Constant>(C->getOperand(i))); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. @@ -166,13 +173,19 @@ } } -// Strip the symbol table of its names. -static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { - for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) { - if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg")) - ++TI; - else - ST.remove(TI++); +// Strip any named types of their names. +static void StripTypeNames(Module &M, bool PreserveDbgInfo) { + std::vector<StructType*> StructTypes; + M.findUsedStructTypes(StructTypes); + + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isLiteral() || STy->getName().empty()) continue; + + if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg")) + continue; + + STy->setName(""); } } @@ -213,7 +226,7 @@ } // Remove all names from types. - StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo); + StripTypeNames(M, PreserveDbgInfo); return true; } @@ -350,8 +363,8 @@ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(), E = MDs.end(); I != E; ++I) { - if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), - true)) { + GlobalVariable *GV = DIGlobalVariable(*I).getGlobal(); + if (GV && M.getGlobalVariable(GV->getName(), true)) { if (!NMD) NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(*I);
diff --git a/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj new file mode 100644 index 0000000..e64a03c --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombine.h b/src/LLVM/lib/Transforms/InstCombine/InstCombine.h index e740248..3808278 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombine.h +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombine.h
@@ -11,6 +11,8 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/IRBuilder.h" @@ -20,6 +22,7 @@ namespace llvm { class CallSite; class TargetData; + class DbgDeclareInst; class MemIntrinsic; class MemSetInst; @@ -51,14 +54,14 @@ /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter - : public IRBuilderDefaultInserter { + : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - void InsertHelper(Instruction *I, + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const { - IRBuilderDefaultInserter::InsertHelper(I, BB, InsertPt); + IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } }; @@ -68,7 +71,6 @@ : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; - bool MustPreserveLCSSA; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -76,11 +78,13 @@ /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. - typedef IRBuilder<TargetFolder, InstCombineIRInserter> BuilderTy; + typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {} + InstCombiner() : FunctionPass(ID), TD(0), Builder(0) { + initializeInstCombinerPass(*PassRegistry::getPassRegistry()); + } public: virtual bool runOnFunction(Function &F); @@ -100,7 +104,7 @@ // Instruction *visitAdd(BinaryOperator &I); Instruction *visitFAdd(BinaryOperator &I); - Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); + Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty); Instruction *visitSub(BinaryOperator &I); Instruction *visitFSub(BinaryOperator &I); Instruction *visitMul(BinaryOperator &I); @@ -142,6 +146,8 @@ ConstantInt *RHS); Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); + Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, ICmpInst::Predicate Pred, Value *TheAdd); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, @@ -171,11 +177,14 @@ Instruction *visitSelectInst(SelectInst &SI); Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); Instruction *visitCallInst(CallInst &CI); + Instruction *visitInvokeInst(InvokeInst &II); Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); + Instruction *visitMalloc(Instruction &FI); + Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); @@ -184,15 +193,16 @@ Instruction *visitExtractElementInst(ExtractElementInst &EI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); + Instruction *visitLandingPadInst(LandingPadInst &LI); // visitInstruction - Specify what to return for unhandled instructions... Instruction *visitInstruction(Instruction &I) { return 0; } private: - bool ShouldChangeType(const Type *From, const Type *To) const; + bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V) const; - const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); @@ -201,15 +211,17 @@ /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + Type *Ty); Instruction *visitCallSite(CallSite CS); + Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); + Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - Value *EmitGEPOffset(User *GEP); public: @@ -224,7 +236,15 @@ Worklist.Add(New); return New; } - + + // InsertNewInstWith - same as InsertNewInstBefore, but also sets the + // debug loc. + // + Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { + New->setDebugLoc(Old.getDebugLoc()); + return InsertNewInstBefore(New, Old); + } + // ReplaceInstUsesWith - This method is to be used when an instruction is // found to be dead, replacable with another preexisting expression. Here // we add all uses of I to the worklist, replace all uses of I with the new @@ -238,7 +258,10 @@ // segment of unreachable code, so just clobber the instruction. if (&I == V) V = UndefValue::get(I.getType()); - + + DEBUG(errs() << "IC: Replacing " << I << "\n" + " with " << *V << '\n'); + I.replaceAllUsesWith(V); return &I; } @@ -279,9 +302,16 @@ private: - /// SimplifyCommutative - This performs a few simplifications for - /// commutative operators. - bool SimplifyCommutative(BinaryOperator &I); + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for + /// operators which are associative or commutative. + bool SimplifyAssociativeOrCommutative(BinaryOperator &I); + + /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations + /// which some other binary operation distributes over either by factorizing + /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this + /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is + /// a win). Returns the simplified value, or null if it didn't simplify. + Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. @@ -305,10 +335,7 @@ // into the PHI (which is only possible if all operands to the PHI are // constants). // - // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms - // that would normally be unprofitable because they strongly encourage jump - // threading. - Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); + Instruction *FoldOpIntoPhi(Instruction &I); // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" // operator and they all are only used by the PHI, PHI together their @@ -333,11 +360,7 @@ Instruction *SimplifyMemSet(MemSetInst *MI); - Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); - - unsigned GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign = 0); - + Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); };
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 262a855..d10046c 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -84,43 +84,37 @@ } Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), TD)) return ReplaceInstUsesWith(I, V); - - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { - // X + (signbit) --> X ^ signbit - const APInt& Val = CI->getValue(); - uint32_t BitWidth = Val.getBitWidth(); - if (Val == APInt::getSignBit(BitWidth)) - return BinaryOperator::CreateXor(LHS, RHS); - - // See if SimplifyDemandedBits can simplify this. This handles stuff like - // (X & 254)+1 -> (X&254)|1 - if (SimplifyDemandedInstructionBits(I)) - return &I; + // (A*B)+(A*C) -> A*(B+C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); - // zext(bool) + C -> bool ? C + 1 : C - if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) - return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - } - - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // X + (signbit) --> X ^ signbit + const APInt &Val = CI->getValue(); + if (Val.isSignBit()) + return BinaryOperator::CreateXor(LHS, RHS); - ConstantInt *XorRHS = 0; - Value *XorLHS = 0; - if (isa<ConstantInt>(RHSC) && - match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(bool) + C -> bool ? C + 1 : C + if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) + if (ZI->getSrcTy()->isIntegerTy(1)) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); + + Value *XorLHS = 0; ConstantInt *XorRHS = 0; + if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); - const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); + const APInt &RHSVal = CI->getValue(); unsigned ExtendAmt = 0; // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. @@ -130,49 +124,43 @@ else if (XorRHS->getValue().isPowerOf2()) ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; } - + if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); if (!MaskedValueIsZero(XorLHS, Mask)) ExtendAmt = 0; } - + if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); - Value *NewShl = Builder->CreateShl(XorLHS, ShAmt); + Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } } } + if (isa<Constant>(RHS) && isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(LHS, RHS); - if (I.getType()->isIntegerTy()) { - // X + X --> X << 1 - if (LHS == RHS) - return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); - - if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { - if (RHSI->getOpcode() == Instruction::Sub) - if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B - return ReplaceInstUsesWith(I, RHSI->getOperand(0)); - } - if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { - if (LHSI->getOpcode() == Instruction::Sub) - if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B - return ReplaceInstUsesWith(I, LHSI->getOperand(0)); - } + // X + X --> X << 1 + if (LHS == RHS) { + BinaryOperator *New = + BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); + New->setHasNoSignedWrap(I.hasNoSignedWrap()); + New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return New; } // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVectorTy()) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV); - return BinaryOperator::CreateNeg(NewAdd); - } + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); } return BinaryOperator::CreateSub(RHS, LHSV); @@ -199,13 +187,8 @@ if (dyn_castFoldableMul(RHS, C2) == LHS) return BinaryOperator::CreateMul(LHS, AddOne(C2)); - // X + ~X --> -1 since ~X = -X-1 - if (match(LHS, m_Not(m_Specific(RHS))) || - match(RHS, m_Not(m_Specific(LHS)))) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - // A+B --> A|B iff A and B have no bits set in common. - if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { + if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); @@ -222,7 +205,7 @@ } // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVectorTy()) { + { Value *W, *X, *Y, *Z; if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { @@ -238,7 +221,7 @@ } if (W == Y) { - Value *NewAdd = Builder->CreateAdd(X, Z); + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); return BinaryOperator::CreateMul(W, NewAdd); } } @@ -251,24 +234,22 @@ // (X & FF00) + xx00 -> (X+xx00) & FF00 if (LHS->hasOneUse() && - match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = ConstantExpr::getAnd(CRHS, C2); - if (Anded == CRHS) { - // See if all bits from the first bit set in the Add RHS up are included - // in the mask. First, get the rightmost bit. - const APInt &AddRHSV = CRHS->getValue(); + match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && + CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt &AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); - // Form a mask of all bits from the lowest bit added through the top. - APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); - // See if the and mask includes all of these bits. - APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); - - if (AddRHSHighBits == AddRHSHighBitsAnd) { - // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS); - return BinaryOperator::CreateAnd(NewAdd, C2); - } + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -293,12 +274,11 @@ // Can we fold the add into the argument of the select? // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && - match(TV, m_Sub(m_Value(N), m_Specific(A)))) + if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && - match(FV, m_Sub(m_Value(N), m_Specific(A)))) + + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); } @@ -316,7 +296,7 @@ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI); + CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -332,7 +312,7 @@ RHSConv->getOperand(0))) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0)); + RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -342,7 +322,7 @@ } Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Constant *RHSC = dyn_cast<Constant>(RHS)) { @@ -389,7 +369,7 @@ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI); + CI, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -405,7 +385,7 @@ RHSConv->getOperand(0))) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0)); + RHSConv->getOperand(0),"addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -421,9 +401,13 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { TargetData &TD = *getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); Value *Result = Constant::getNullValue(IntPtrTy); + // If the GEP is inbounds, we know that none of the addressing operations will + // overflow in an unsigned sense. + bool isInBounds = cast<GEPOperator>(GEP)->isInBounds(); + // Build a mask for high order bits. unsigned IntPtrWidth = TD.getPointerSizeInBits(); uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); @@ -436,33 +420,34 @@ if (OpC->isZero()) continue; // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - Result = Builder->CreateAdd(Result, - ConstantInt::get(IntPtrTy, Size)); + if (Size) + Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); continue; } Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale); + Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); // Emit an add instruction. - Result = Builder->CreateAdd(Result, Scale); + Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; } // Convert to correct type. if (Op->getType() != IntPtrTy) - Op = Builder->CreateIntCast(Op, IntPtrTy, true); + Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { - Constant *Scale = ConstantInt::get(IntPtrTy, Size); // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, Scale); + Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".idx", isInBounds /*NUW*/); } // Emit an add instruction. - Result = Builder->CreateAdd(Op, Result); + Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); } return Result; } @@ -475,7 +460,7 @@ /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. /// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, - const Type *Ty) { + Type *Ty) { assert(TD && "Must have target data info for this"); // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize @@ -535,7 +520,7 @@ // If we have p - gep(p, ...) then we have to negate the result. if (Swapped) - Result = Builder->CreateNeg(Result); + Result = Builder->CreateNeg(Result, "diff.neg"); return Builder->CreateIntCast(Result, Ty, true); } @@ -544,8 +529,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + // (A*B)-(A*C) -> A*(B-C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. if (Value *V = dyn_castNegVal(Op1)) { @@ -555,18 +545,14 @@ return Res; } - if (isa<UndefValue>(Op0)) - return ReplaceInstUsesWith(I, Op0); // undef - X -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X - undef -> undef if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(Op0, Op1); + + // Replace (-1 - A) with (~A). + if (match(Op0, m_AllOnes())) + return BinaryOperator::CreateNot(Op1); if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { - // Replace (-1 - A) with (~A). - if (C->isAllOnesValue()) - return BinaryOperator::CreateNot(Op1); - // C - ~X == X + (1+C) Value *X = 0; if (match(Op1, m_Not(m_Value(X)))) @@ -575,29 +561,16 @@ // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) if (C->isZero()) { - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { - if (SI->getOpcode() == Instruction::LShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert AShr. - return BinaryOperator::Create(Instruction::AShr, - SI->getOperand(0), CU); - } - } - } else if (SI->getOpcode() == Instruction::AShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert LShr. - return BinaryOperator::CreateLShr( - SI->getOperand(0), CU); - } - } - } - } + Value *X; ConstantInt *CI; + if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateAShr(X, CI); + + if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) && + // Verify we are shifting out everything but the sign bit. + CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) + return BinaryOperator::CreateLShr(X, CI); } // Try to fold constant sub into select arguments. @@ -607,83 +580,80 @@ // C - zext(bool) -> bool ? C - 1 : C if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) - if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); + + // C-(X+C2) --> (C-C2)-X + ConstantInt *C2; + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) + return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); } - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op1I->getOpcode() == Instruction::Add) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1)); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0)); - else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { - if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) - // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub( - ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); - } - } - - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::Sub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level add instruction... - return BinaryOperator::CreateAdd(Op0, Op1); - } - - // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... - // - if (Op1I->getOpcode() == Instruction::And && - (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { - Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); - - Value *NewNot = Builder->CreateNot(OtherOp); - return BinaryOperator::CreateAnd(Op0, NewNot); - } - - // 0 - (X sdiv C) -> (X sdiv -C) - if (Op1I->getOpcode() == Instruction::SDiv) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isZero()) - if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) - return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - ConstantExpr::getNeg(DivRHS)); - - // 0 - (C << X) -> (-C << X) - if (Op1I->getOpcode() == Instruction::Shl) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isZero()) - if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0))) - return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1)); - - // X - X*C --> X * (1-C) - ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = - ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), - C2); - return BinaryOperator::CreateMul(Op0, CP1); - } - } + + { Value *Y; + // X-(X+Y) == -Y X-(Y+X) == -Y + if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || + match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) + return BinaryOperator::CreateNeg(Y); + + // (X-Y)-X == -Y + if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) + return BinaryOperator::CreateNeg(Y); } + + if (Op1->hasOneUse()) { + Value *X = 0, *Y = 0, *Z = 0; + Constant *C = 0; + ConstantInt *CI = 0; - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (Op0I->getOpcode() == Instruction::Add) { - if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::Sub) { - if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1)); + // (X - (Y - Z)) --> (X + (Z - Y)). + if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) + return BinaryOperator::CreateAdd(Op0, + Builder->CreateSub(Z, Y, Op1->getName())); + + // (X - (X & Y)) --> (X & ~Y) + // + if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) || + match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) + return BinaryOperator::CreateAnd(Op0, + Builder->CreateNot(Y, Y->getName() + ".not")); + + // 0 - (X sdiv C) -> (X sdiv -C) + if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && + match(Op0, m_Zero())) + return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); + + // 0 - (X << Y) -> (-X << Y) when X is freely negatable. + if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) + if (Value *XNeg = dyn_castNegVal(X)) + return BinaryOperator::CreateShl(XNeg, Y); + + // X - X*C --> X * (1-C) + if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); + return BinaryOperator::CreateMul(Op0, CP1); + } + + // X - X<<C --> X * (1-(1<<C)) + if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) { + Constant *One = ConstantInt::get(I.getType(), 1); + C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); + return BinaryOperator::CreateMul(Op0, C); + } + + // X - A*-B -> X + A*B + // X - -A*B -> X + A*B + Value *A, *B; + if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || + match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) + return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); + + // X - A*CI -> X + A*-CI + // X - CI*A -> X + A*-CI + if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || + match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) { + Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); + return BinaryOperator::CreateAdd(Op0, NewMul); } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a00337f..5e0bfe8 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -172,7 +173,9 @@ case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; - case 7: return ConstantInt::getTrue(LHS->getContext()); + case 7: + if (!isordered) return ConstantInt::getTrue(LHS->getContext()); + Pred = FCmpInst::FCMP_ORD; break; } return Builder->CreateFCmp(Pred, LHS, RHS); } @@ -207,15 +210,26 @@ } break; case Instruction::Or: - if (Together == AndRHS) // (X | C) & C --> C - return ReplaceInstUsesWith(TheAnd, AndRHS); - - if (Op->hasOneUse() && Together != OpRHS) { - // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Value *Or = Builder->CreateOr(X, Together); - Or->takeName(Op); - return BinaryOperator::CreateAnd(Or, AndRHS); + if (Op->hasOneUse()){ + if (Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Value *Or = Builder->CreateOr(X, Together); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); + } + + ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together); + if (TogetherCI && !TogetherCI->isZero()){ + // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 + // NOTE: This reduces the number of bits set in the & mask, which + // can expose opportunities for store narrowing. + Together = ConstantExpr::getXor(AndRHS, Together); + Value *And = Builder->CreateAnd(X, Together); + And->takeName(Op); + return BinaryOperator::CreateOr(And, OpRHS); + } } + break; case Instruction::Add: if (Op->hasOneUse()) { @@ -261,10 +275,11 @@ ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), AndRHS->getValue() & ShlMask); - if (CI->getValue() == ShlMask) { - // Masking out bits that the shift already masks + if (CI->getValue() == ShlMask) + // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. - } else if (CI != AndRHS) { // Reducing bits set in and. + + if (CI != AndRHS) { // Reducing bits set in and. TheAnd.setOperand(1, CI); return &TheAnd; } @@ -281,10 +296,11 @@ ConstantInt *CI = ConstantInt::get(Op->getContext(), AndRHS->getValue() & ShrMask); - if (CI->getValue() == ShrMask) { - // Masking out bits that the shift already masks. + if (CI->getValue() == ShrMask) + // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); - } else if (CI != AndRHS) { + + if (CI != AndRHS) { TheAnd.setOperand(1, CI); // Reduce bits set in and cst. return &TheAnd; } @@ -304,8 +320,8 @@ // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS); - return BinaryOperator::CreateAnd(ShVal, AndRHS); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } } break; @@ -315,7 +331,7 @@ /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. @@ -338,7 +354,7 @@ // Emit V-Lo <u Hi-Lo Constant *NegLo = ConstantExpr::getNeg(Lo); - Value *Add = Builder->CreateAdd(V, NegLo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); return Builder->CreateICmpULT(Add, UpperBound); } @@ -357,7 +373,7 @@ // Emit V-Lo >u Hi-1-Lo // Note that Hi has already had one subtracted from it, above. ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); - Value *Add = Builder->CreateAdd(V, NegLo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); return Builder->CreateICmpUGT(Add, LowerBound); } @@ -430,8 +446,272 @@ } if (isSub) - return Builder->CreateSub(LHSI->getOperand(0), RHS); - return Builder->CreateAdd(LHSI->getOperand(0), RHS); + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); +} + +/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C) +/// One of A and B is considered the mask, the other the value. This is +/// described as the "AMask" or "BMask" part of the enum. If the enum +/// contains only "Mask", then both A and B can be considered masks. +/// If A is the mask, then it was proven, that (A & C) == C. This +/// is trivial if C == A, or C == 0. If both A and C are constants, this +/// proof is also easy. +/// For the following explanations we assume that A is the mask. +/// The part "AllOnes" declares, that the comparison is true only +/// if (A & B) == A, or all bits of A are set in B. +/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes +/// The part "AllZeroes" declares, that the comparison is true only +/// if (A & B) == 0, or all bits of A are cleared in B. +/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes +/// The part "Mixed" declares, that (A & B) == C and C might or might not +/// contain any number of one bits and zero bits. +/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed +/// The Part "Not" means, that in above descriptions "==" should be replaced +/// by "!=". +/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes +/// If the mask A contains a single bit, then the following is equivalent: +/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) +/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) +enum MaskedICmpType { + FoldMskICmp_AMask_AllOnes = 1, + FoldMskICmp_AMask_NotAllOnes = 2, + FoldMskICmp_BMask_AllOnes = 4, + FoldMskICmp_BMask_NotAllOnes = 8, + FoldMskICmp_Mask_AllZeroes = 16, + FoldMskICmp_Mask_NotAllZeroes = 32, + FoldMskICmp_AMask_Mixed = 64, + FoldMskICmp_AMask_NotMixed = 128, + FoldMskICmp_BMask_Mixed = 256, + FoldMskICmp_BMask_NotMixed = 512 +}; + +/// return the set of pattern classes (from MaskedICmpType) +/// that (icmp SCC (A & B), C) satisfies +static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, + ICmpInst::Predicate SCC) +{ + ConstantInt *ACst = dyn_cast<ConstantInt>(A); + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); + bool icmp_abit = (ACst != 0 && !ACst->isZero() && + ACst->getValue().isPowerOf2()); + bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + BCst->getValue().isPowerOf2()); + unsigned result = 0; + if (CCst != 0 && CCst->isZero()) { + // if C is zero, then both A and B qualify as mask + result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed | + FoldMskICmp_BMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed)); + return result; + } + if (A == C) { + result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes | + FoldMskICmp_AMask_Mixed) + : (FoldMskICmp_AMask_NotAllOnes | + FoldMskICmp_AMask_NotMixed)); + if (icmp_abit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_AMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_AMask_Mixed)); + } + else if (ACst != 0 && CCst != 0 && + ConstantExpr::getAnd(ACst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_AMask_Mixed + : FoldMskICmp_AMask_NotMixed); + } + if (B == C) + { + result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes | + FoldMskICmp_BMask_Mixed) + : (FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_BMask_NotMixed)); + if (icmp_bbit) + result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | + FoldMskICmp_BMask_NotMixed) + : (FoldMskICmp_Mask_AllZeroes | + FoldMskICmp_BMask_Mixed)); + } + else if (BCst != 0 && CCst != 0 && + ConstantExpr::getAnd(BCst, CCst) == CCst) { + result |= (icmp_eq ? FoldMskICmp_BMask_Mixed + : FoldMskICmp_BMask_NotMixed); + } + return result; +} + +/// foldLogOpOfMaskedICmpsHelper: +/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// return the set of pattern classes (from MaskedICmpType) +/// that both LHS and RHS satisfy +static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, + Value*& B, Value*& C, + Value*& D, Value*& E, + ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0; + if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0; + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; + // vectors are not (yet?) supported + if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; + + // Here comes the tricky part: + // LHS might be of the form L11 & L12 == X, X == L21 & L22, + // and L11 & L12 == L21 & L22. The same goes for RHS. + // Now we must find those components L** and R**, that are equal, so + // that we can extract the parameters A, B, C, D, and E for the canonical + // above. + Value *L1 = LHS->getOperand(0); + Value *L2 = LHS->getOperand(1); + Value *L11,*L12,*L21,*L22; + if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { + if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + L21 = L22 = 0; + } + else { + if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) + return 0; + std::swap(L1, L2); + L21 = L22 = 0; + } + + Value *R1 = RHS->getOperand(0); + Value *R2 = RHS->getOperand(1); + Value *R11,*R12; + bool ok = false; + if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { + A = R11; D = R12; E = R2; ok = true; + } + else + if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + A = R12; D = R11; E = R2; ok = true; + } + } + if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { + A = R11; D = R12; E = R1; ok = true; + } + else + if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + A = R12; D = R11; E = R1; ok = true; + } + else + return 0; + } + if (!ok) + return 0; + + if (L11 == A) { + B = L12; C = L2; + } + else if (L12 == A) { + B = L11; C = L2; + } + else if (L21 == A) { + B = L22; C = L1; + } + else if (L22 == A) { + B = L21; C = L1; + } + + unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC); + unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC); + return left_type & right_type; +} +/// foldLogOpOfMaskedICmps: +/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// into a single (icmp(A & X) ==/!= Y) +static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, + ICmpInst::Predicate NEWCC, + llvm::InstCombiner::BuilderTy* Builder) { + Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; + unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS); + if (mask == 0) return 0; + + if (NEWCC == ICmpInst::ICMP_NE) + mask >>= 1; // treat "Not"-states as normal states + + if (mask & FoldMskICmp_Mask_AllZeroes) { + // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) + // -> (icmp eq (A & (B|D)), 0) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + // we can't use C as zero, because we might actually handle + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // with B and D, having a single bit set + Value* zero = Constant::getNullValue(A->getType()); + return Builder->CreateICmp(NEWCC, newAnd, zero); + } + else if (mask & FoldMskICmp_BMask_AllOnes) { + // (icmp eq (A & B), B) & (icmp eq (A & D), D) + // -> (icmp eq (A & (B|D)), (B|D)) + Value* newOr = Builder->CreateOr(B, D); + Value* newAnd = Builder->CreateAnd(A, newOr); + return Builder->CreateICmp(NEWCC, newAnd, newOr); + } + else if (mask & FoldMskICmp_AMask_AllOnes) { + // (icmp eq (A & B), A) & (icmp eq (A & D), A) + // -> (icmp eq (A & (B&D)), A) + Value* newAnd1 = Builder->CreateAnd(B, D); + Value* newAnd = Builder->CreateAnd(A, newAnd1); + return Builder->CreateICmp(NEWCC, newAnd, A); + } + else if (mask & FoldMskICmp_BMask_Mixed) { + // (icmp eq (A & B), C) & (icmp eq (A & D), E) + // We already know that B & C == C && D & E == E. + // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of + // C and E, which are shared by both the mask B and the mask D, don't + // contradict, then we can transform to + // -> (icmp eq (A & (B|D)), (C|E)) + // Currently, we only handle the case of B, C, D, and E being constant. + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + if (BCst == 0) return 0; + ConstantInt *DCst = dyn_cast<ConstantInt>(D); + if (DCst == 0) return 0; + // we can't simply use C and E, because we might actually handle + // (icmp ne (A & B), B) & (icmp eq (A & D), D) + // with B and D, having a single bit set + + ConstantInt *CCst = dyn_cast<ConstantInt>(C); + if (CCst == 0) return 0; + if (LHS->getPredicate() != NEWCC) + CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); + ConstantInt *ECst = dyn_cast<ConstantInt>(E); + if (ECst == 0) return 0; + if (RHS->getPredicate() != NEWCC) + ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); + ConstantInt* MCst = dyn_cast<ConstantInt>( + ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), + ConstantExpr::getXor(CCst, ECst)) ); + // if there is a conflict we should actually return a false for the + // whole construct + if (!MCst->isZero()) + return 0; + Value *newOr1 = Builder->CreateOr(B, D); + Value *newOr2 = ConstantExpr::getOr(CCst, ECst); + Value *newAnd = Builder->CreateAnd(A, newOr1); + return Builder->CreateICmp(NEWCC, newAnd, newOr2); + } + return 0; } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -451,6 +731,10 @@ return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } + + // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder)) + return V; // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); @@ -472,20 +756,52 @@ Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) --> - // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT - if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *Op1 = 0, *Op2 = 0; - ConstantInt *CI1 = 0, *CI2 = 0; - if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && - match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { - if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && - CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { - Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); - return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr); - } + + // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } + + // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + } + + // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 + // where CMAX is the all ones value for the truncated type, + // iff the lower bits of C2 and CA are zero. + if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) && + LHS->hasOneUse() && RHS->hasOneUse()) { + Value *V; + ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + + // (trunc x) == C1 & (and x, CA) == C2 + if (match(Val2, m_Trunc(m_Value(V))) && + match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = RHSCst; + BigCst = LHSCst; + } + // (and x, CA) == C2 & (trunc x) == C1 + else if (match(Val, m_Trunc(m_Value(V))) && + match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = LHSCst; + BigCst = RHSCst; + } + + if (SmallCst && BigCst) { + unsigned BigBitSize = BigCst->getType()->getBitWidth(); + unsigned SmallBitSize = SmallCst->getType()->getBitWidth(); + + // Check that the low bits are zero. + APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); + if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) { + Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue()); + APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue(); + Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N); + return Builder->CreateICmp(LHSCC, NewAnd, NewVal); } } } @@ -500,7 +816,17 @@ LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) return 0; - + + // Make a constant range that's the intersection of the two icmp ranges. + // If the intersection is empty, we know that the result is false. + ConstantRange LHSRange = + ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange RHSRange = + ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + + if (LHSRange.intersectWith(RHSRange).isEmptySet()) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) return 0; @@ -533,10 +859,6 @@ case ICmpInst::ICMP_EQ: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 @@ -560,7 +882,7 @@ case ICmpInst::ICMP_NE: if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1)); } break; // (X != 13 & X != 15) -> no change @@ -584,9 +906,6 @@ case ICmpInst::ICMP_SLT: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 @@ -712,12 +1031,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyAndInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -725,7 +1048,6 @@ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); - APInt NotAndRHS(~AndRHSMask); // Optimize a variety of ((val OP C1) & C2) combinations... if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { @@ -734,23 +1056,27 @@ switch (Op0I->getOpcode()) { default: break; case Instruction::Xor: - case Instruction::Or: + case Instruction::Or: { // If the mask is only needed on one incoming arm, push it up. if (!Op0I->hasOneUse()) break; + APInt NotAndRHS(~AndRHSMask); if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS); + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); } if (!isa<Constant>(Op0RHS) && MaskedValueIsZero(Op0RHS, NotAndRHS)) { // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS); + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); } break; + } case Instruction::Add: // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 @@ -770,14 +1096,12 @@ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS // has 1's for all bits that the subtraction with A might affect. - if (Op0I->hasOneUse()) { + if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) { uint32_t BitWidth = AndRHSMask.getBitWidth(); uint32_t Zeros = AndRHSMask.countLeadingZeros(); APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); - ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); - if (!(A && A->isZero()) && // avoid infinite recursion. - MaskedValueIsZero(Op0LHS, Mask)) { + if (MaskedValueIsZero(Op0LHS, Mask)) { Value *NewNeg = Builder->CreateNeg(Op0RHS); return BinaryOperator::CreateAnd(NewNeg, AndRHS); } @@ -795,38 +1119,25 @@ } break; } - + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) return Res; - } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { - // If this is an integer truncation or change from signed-to-unsigned, and - // if the source is an and/or with immediate, transform it. This - // frequently occurs for bitfield accesses. - if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { - if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && - CastOp->getNumOperands() == 2) - if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ - if (CastOp->getOpcode() == Instruction::And) { - // Change: and (cast (and X, C1) to T), C2 - // into : and (cast X to T), trunc_or_bitcast(C1)&C2 - // This will fold the two constants together, which may allow - // other simplifications. - Value *NewCast = Builder->CreateTruncOrBitCast( - CastOp->getOperand(0), I.getType()); - // trunc_or_bitcast(C1)&C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - C3 = ConstantExpr::getAnd(C3, AndRHS); - return BinaryOperator::CreateAnd(NewCast, C3); - } else if (CastOp->getOpcode() == Instruction::Or) { - // Change: and (cast (or X, C1) to T), C2 - // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) - // trunc(C1)&C2 - return ReplaceInstUsesWith(I, AndRHS); - } - } + } + + // If this is an integer truncation, and if the source is an 'and' with + // immediate, transform it. This frequently occurs for bitfield accesses. + { + Value *X = 0; ConstantInt *YC = 0; + if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { + // Change: and (trunc (and X, YC) to T), C2 + // into : and (trunc X to T), trunc(YC) & C2 + // This will fold the two constants together, which may allow + // other simplifications. + Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); + Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); } } @@ -844,10 +1155,11 @@ if (Value *Op0NotVal = dyn_castNotVal(Op0)) if (Value *Op1NotVal = dyn_castNotVal(Op1)) if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal); + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); return BinaryOperator::CreateNot(Or); } - + { Value *A = 0, *B = 0, *C = 0, *D = 0; // (A|B) & ~(A&B) -> A^B @@ -862,26 +1174,31 @@ ((A == C && B == D) || (A == D && B == C))) return BinaryOperator::CreateXor(A, B); - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1) { // (A^B)&A -> A&(A^B) - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } else if (B == Op1) { // (A^B)&B -> B&(B^A) - cast<BinaryOperator>(Op0)->swapOperands(); - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); + // A&(A^B) => A & ~B + { + Value *tmpOp0 = Op0; + Value *tmpOp1 = Op1; + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1 ) { + tmpOp1 = Op0; + tmpOp0 = Op1; + // Simplify below + } } - } - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_Value(B)))) { - if (B == Op0) { // B&(A^B) -> B&(B^A) - cast<BinaryOperator>(Op1)->swapOperands(); - std::swap(A, B); + if (tmpOp1->hasOneUse() && + match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == tmpOp0) { + std::swap(A, B); + } + // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if + // A is originally -1 (or a vector of -1 and undefs), then we enter + // an endless loop. By checking that A is non-constant we ensure that + // we will never get to the loop. + if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); } - if (A == Op0) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); } // (A&((~A)|B)) -> A&B @@ -908,7 +1225,7 @@ // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { @@ -917,7 +1234,7 @@ // Only do this if the casts both really cause code to be generated. if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { - Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp); + Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } @@ -944,7 +1261,8 @@ SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { Value *NewOp = - Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0)); + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -1083,7 +1401,7 @@ /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. /// If so, insert the new bswap intrinsic and return it. Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); + IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) @@ -1107,9 +1425,8 @@ for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) return 0; - const Type *Tys[] = { ITy }; Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); } @@ -1155,7 +1472,12 @@ return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } - + + // handle (roughly): + // (icmp ne (A & B), C) | (icmp ne (A & D), E) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder)) + return V; + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); @@ -1168,24 +1490,29 @@ Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) --> - // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT - if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { - Value *Op1 = 0, *Op2 = 0; - ConstantInt *CI1 = 0, *CI2 = 0; - if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && - match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { - if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && - CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { - Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); - return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr); - } - } + + // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); } } - + + // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) + // iff C2 + CA == C1. + if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) { + ConstantInt *AddCst; + if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst)))) + if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue()) + return Builder->CreateICmpULE(Val, LHSCst); + } + // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; @@ -1233,7 +1560,7 @@ if (LHSCst == SubOne(RHSCst)) { // (X == 13 | X == 14) -> X-13 <u 2 Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); return Builder->CreateICmpULT(Add, AddCST); } @@ -1424,12 +1751,16 @@ } Instruction *InstCombiner::visitOr(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyOrInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -1476,8 +1807,8 @@ // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. if (match(Op0, m_Or(m_Value(), m_Value())) || match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_Shift(m_Value(), m_Value())) && - match(Op1, m_Shift(m_Value(), m_Value())))) { + (match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value())))) { if (Instruction *BSwap = MatchBSwap(I)) return BSwap; } @@ -1504,7 +1835,7 @@ Value *C = 0, *D = 0; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0, *V3 = 0; + Value *V1 = 0, *V2 = 0; C1 = dyn_cast<ConstantInt>(C); C2 = dyn_cast<ConstantInt>(D); if (C1 && C2) { // (A & C1)|(B & C2) @@ -1555,32 +1886,13 @@ (C3->getValue() & ~C1->getValue()) == 0 && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && (C4->getValue() & ~C2->getValue()) == 0) { - V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4)); + V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); return BinaryOperator::CreateAnd(V2, ConstantInt::get(B->getContext(), C1->getValue()|C2->getValue())); } } } - - // Check to see if we have any common things being and'ed. If so, find the - // terms for V1 & (V2|V3). - if (Op0->hasOneUse() || Op1->hasOneUse()) { - V1 = 0; - if (A == B) // (A & C)|(A & D) == A & (C|D) - V1 = A, V2 = C, V3 = D; - else if (A == D) // (A & C)|(B & A) == A & (B|C) - V1 = A, V2 = B, V3 = C; - else if (C == B) // (A & C)|(C & D) == C & (A|D) - V1 = C, V2 = A, V3 = D; - else if (C == D) // (A & C)|(B & C) == C & (A|B) - V1 = C, V2 = A, V3 = B; - - if (V1) { - Value *Or = Builder->CreateOr(V2, V3); - return BinaryOperator::CreateAnd(V1, Or); - } - } // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants. // Don't do this for vector select idioms, the code generator doesn't handle @@ -1633,7 +1945,8 @@ if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0)); + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -1643,10 +1956,44 @@ if (Value *Op0NotVal = dyn_castNotVal(Op0)) if (Value *Op1NotVal = dyn_castNotVal(Op1)) if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal); + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); return BinaryOperator::CreateNot(And); } + // Canonicalize xor to the RHS. + if (match(Op0, m_Xor(m_Value(), m_Value()))) + std::swap(Op0, Op1); + + // A | ( A ^ B) -> A | B + // A | (~A ^ B) -> A | ~B + if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (Op0 == A || Op0 == B) + return BinaryOperator::CreateOr(A, B); + + if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(B, B->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(A, A->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + + // A | ~(A | B) -> A | ~B + // A | ~(A ^ B) -> A | ~B + if (match(Op1, m_Not(m_Value(A)))) + if (BinaryOperator *B = dyn_cast<BinaryOperator>(A)) + if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) && + Op1->hasOneUse() && (B->getOpcode() == Instruction::Or || + B->getOpcode() == Instruction::Xor)) { + Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : + B->getOperand(0); + Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) if (Value *Res = FoldOrOfICmps(LHS, RHS)) @@ -1660,65 +2007,76 @@ // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVectorTy()) { - Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + CastInst *Op1C = dyn_cast<CastInst>(Op1); + if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); - if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && - // Only do this if the casts both really cause code to be - // generated. - ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && - ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { - Value *NewOp = Builder->CreateOr(Op0COp, Op1COp); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - - // If this is or(cast(icmp), cast(icmp)), try to fold this even if the - // cast is otherwise not optimizable. This happens for vector sexts. - if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) - if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) - if (Value *Res = FoldOrOfICmps(LHS, RHS)) - return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - - // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the - // cast is otherwise not optimizable. This happens for vector sexts. - if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) - if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) - if (Value *Res = FoldOrOfFCmps(LHS, RHS)) - return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && + // Only do this if the casts both really cause code to be + // generated. + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); } + } + } + + // or(sext(A), B) -> A ? -1 : B where A is an i1 + // or(A, sext(B)) -> B ? -1 : A where B is an i1 + if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); + if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); + + // Note: If we've gotten to the point of visiting the outer OR, then the + // inner one couldn't be simplified. If it was a constant, then it won't + // be simplified by a later pass either, so we try swapping the inner/outer + // ORs in the hopes that we'll be able to simplify it this way. + // (X|C) | V --> (X|V) | C + if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) && + match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { + Value *Inner = Builder->CreateOr(A, Op1); + Inner->takeName(Op0); + return BinaryOperator::CreateOr(Inner, C1); } return Changed ? &I : 0; } Instruction *InstCombiner::visitXor(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) { - if (isa<UndefValue>(Op0)) - // Handle undef ^ undef -> 0 special case. This is a common - // idiom (misuse). - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef - } + if (Value *V = SimplifyXorInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // xor X, X = 0 - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - + // (A&B)^(A&C) -> A&(B^C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (I.getType()->isVectorTy()) - if (isa<ConstantAggregateZero>(Op1)) - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { @@ -1731,7 +2089,8 @@ Op0I->swapOperands(); if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { Value *NotY = - Builder->CreateNot(Op0I->getOperand(1)); + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); if (Op0I->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(Op0NotVal, NotY); return BinaryOperator::CreateAnd(Op0NotVal, NotY); @@ -1742,9 +2101,9 @@ if (isFreeToInvert(Op0I->getOperand(0)) && isFreeToInvert(Op0I->getOperand(1))) { Value *NotX = - Builder->CreateNot(Op0I->getOperand(0)); + Builder->CreateNot(Op0I->getOperand(0), "notlhs"); Value *NotY = - Builder->CreateNot(Op0I->getOperand(1)); + Builder->CreateNot(Op0I->getOperand(1), "notrhs"); if (Op0I->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(NotX, NotY); return BinaryOperator::CreateAnd(NotX, NotY); @@ -1836,15 +2195,6 @@ return NV; } - if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 - if (X == Op1) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 - if (X == Op0) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); if (Op1I) { Value *A, *B; @@ -1857,10 +2207,6 @@ I.swapOperands(); // Simplified below. std::swap(Op0, Op1); } - } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // A^(A^B) == B - } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { - return ReplaceInstUsesWith(I, A); // A^(B^A) == B } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ if (A == Op0) { // A^(A&B) -> A^(B&A) @@ -1883,10 +2229,6 @@ std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); - } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // (A^B)^A == B - } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { - return ReplaceInstUsesWith(I, A); // (B^A)^A == B } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A @@ -1904,7 +2246,8 @@ Op0I->getOperand(1) == Op1I->getOperand(1) && (Op1I->hasOneUse() || Op1I->hasOneUse())) { Value *NewOp = - Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0)); + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); return BinaryOperator::Create(Op1I->getOpcode(), NewOp, Op1I->getOperand(1)); } @@ -1923,29 +2266,8 @@ if ((A == C && B == D) || (A == D && B == C)) return BinaryOperator::CreateXor(A, B); } - - // (A & B)^(C & D) - if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && - match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - // (X & Y)^(X & Y) -> (Y^Z) & X - Value *X = 0, *Y = 0, *Z = 0; - if (A == C) - X = A, Y = B, Z = D; - else if (A == D) - X = A, Y = B, Z = C; - else if (B == C) - X = B, Y = A, Z = D; - else if (B == D) - X = B, Y = A, Z = C; - - if (X) { - Value *NewOp = Builder->CreateXor(Y, Z); - return BinaryOperator::CreateAnd(NewOp, X); - } - } } - + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) @@ -1967,7 +2289,7 @@ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && // Only do this if the casts both really cause code to be generated. ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), @@ -1975,7 +2297,7 @@ ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType())) { Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), - Op1C->getOperand(0)); + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7428478..eac794b 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,116 +12,27 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; /// getPromotedType - Return the specified type promoted as it would be to pass /// though a va_arg area. -static const Type *getPromotedType(const Type *Ty) { - if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { +static Type *getPromotedType(Type *Ty) { + if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { if (ITy->getBitWidth() < 32) return Type::getInt32Ty(Ty->getContext()); } return Ty; } -/// EnforceKnownAlignment - If the specified pointer points to an object that -/// we control, modify the object's alignment to PrefAlign. This isn't -/// often possible though. If alignment is important, a more reliable approach -/// is to simply align all global variables and allocation instructions to -/// their preferred alignment from the beginning. -/// -static unsigned EnforceKnownAlignment(Value *V, - unsigned Align, unsigned PrefAlign) { - - User *U = dyn_cast<User>(V); - if (!U) return Align; - - switch (Operator::getOpcode(U)) { - default: break; - case Instruction::BitCast: - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - case Instruction::GetElementPtr: { - // If all indexes are zero, it is just the alignment of the base pointer. - bool AllZeroOperands = true; - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) - if (!isa<Constant>(*i) || - !cast<Constant>(*i)->isNullValue()) { - AllZeroOperands = false; - break; - } - - if (AllZeroOperands) { - // Treat this like a bitcast. - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - } - return Align; - } - case Instruction::Alloca: { - AllocaInst *AI = cast<AllocaInst>(V); - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - return AI->getAlignment(); - AI->setAlignment(PrefAlign); - return PrefAlign; - } - } - - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // If there is a large requested alignment and we can, bump up the alignment - // of the global. - if (GV->isDeclaration()) return Align; - - if (GV->getAlignment() >= PrefAlign) - return GV->getAlignment(); - // We can only increase the alignment of the global if it has no alignment - // specified or if it is not assigned a section. If it is assigned a - // section, the global could be densely packed with other objects in the - // section, increasing the alignment could cause padding issues. - if (!GV->hasSection() || GV->getAlignment() == 0) - GV->setAlignment(PrefAlign); - return GV->getAlignment(); - } - - return Align; -} - -/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that -/// we can determine, return it, otherwise return 0. If PrefAlign is specified, -/// and it is more than the alignment of the ultimate object, see if we can -/// increase the alignment of the ultimate object, making this check succeed. -unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign) { - assert(V->getType()->isPointerTy() && - "GetOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne); - unsigned TrailZ = KnownZero.countTrailingOnes(); - - // Avoid trouble with rediculously large TrailZ values, such as - // those computed from a null pointer. - TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); - - // LLVM doesn't support alignments larger than this currently. - Align = std::min(Align, +Value::MaximumAlignment); - - if (PrefAlign > Align) - Align = EnforceKnownAlignment(V, Align, PrefAlign); - - // We don't need to make any adjustment. - return Align; -} Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1)); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -152,7 +63,7 @@ unsigned DstAddrSp = cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); - const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); + IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); @@ -164,18 +75,18 @@ // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); if (StrippedDest != MI->getArgOperand(0)) { - const Type *SrcETy = cast<PointerType>(StrippedDest->getType()) + Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. while (!SrcETy->isSingleValueType()) { - if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { + if (StructType *STy = dyn_cast<StructType>(SrcETy)) { if (STy->getNumElements() == 1) SrcETy = STy->getElementType(0); else break; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { if (ATy->getNumElements() == 1) SrcETy = ATy->getElementType(); else @@ -199,10 +110,10 @@ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); - Instruction *L = new LoadInst(Src, MI->isVolatile(), SrcAlign); - InsertNewInstBefore(L, *MI); - InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), - *MI); + LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); + L->setAlignment(SrcAlign); + StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); + S->setAlignment(DstAlign); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); @@ -210,7 +121,7 @@ } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); + unsigned Alignment = getKnownAlignment(MI->getDest(), TD); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -230,18 +141,21 @@ // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. + Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. Value *Dest = MI->getDest(); - Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); + unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); + Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); + Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), - Dest, false, Alignment), *MI); + StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, + MI->isVolatile()); + S->setAlignment(Alignment); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); @@ -256,6 +170,19 @@ /// the heavy lifting. /// Instruction *InstCombiner::visitCallInst(CallInst &CI) { + if (isFreeCall(&CI)) + return visitFree(CI); + if (isMalloc(&CI)) + return visitMalloc(CI); + + // If the caller function is nounwind, mark the call as nounwind, even if the + // callee isn't. + if (CI.getParent()->getParent()->doesNotThrow() && + !CI.doesNotThrow()) { + CI.setDoesNotThrow(); + return &CI; + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); if (!II) return visitCallSite(&CI); @@ -266,7 +193,8 @@ // memmove/cpy/set of zero bytes is a noop. if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { - if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); + if (NumBytes->isNullValue()) + return EraseInstFromFunction(CI); if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) if (CI->getZExtValue() == 1) { @@ -275,6 +203,10 @@ // alignment is sufficient. } } + + // No other transformations apply to volatile transfers. + if (MI->isVolatile()) + return 0; // If we have a memmove and the source operation is a constant global, // then the source and dest pointers can't alias, so we can change this @@ -284,10 +216,10 @@ if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getArgOperand(0)->getType(), - CI.getArgOperand(1)->getType(), - CI.getArgOperand(2)->getType() }; - CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); Changed = true; } } @@ -317,73 +249,73 @@ // We need target data for just about everything so depend on it. if (!TD) break; - const Type *ReturnTy = CI.getType(); - bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + Type *ReturnTy = CI.getType(); + uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; // Get to the real allocated thing and offset as fast as possible. Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); - + + uint64_t Offset = 0; + uint64_t Size = -1ULL; + + // Try to look through constant GEPs. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) { + if (!GEP->hasAllConstantIndices()) break; + + // Get the current byte offset into the thing. Use the original + // operand in case we're looking through a bitcast. + SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); + + Op1 = GEP->getPointerOperand()->stripPointerCasts(); + + // Make sure we're not a constant offset from an external + // global. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) + if (!GV->hasDefinitiveInitializer()) break; + } + // If we've stripped down to a single global variable that we // can know the size of then just return that. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { if (GV->hasDefinitiveInitializer()) { Constant *C = GV->getInitializer(); - uint64_t GlobalSize = TD->getTypeAllocSize(C->getType()); - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize)); + Size = TD->getTypeAllocSize(C->getType()); } else { // Can't determine size of the GV. - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow); return ReplaceInstUsesWith(CI, RetVal); } } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { // Get alloca size. if (AI->getAllocatedType()->isSized()) { - uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + Size = TD->getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize()); if (!C) break; - AllocaSize *= C->getZExtValue(); + Size *= C->getZExtValue(); } - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize)); } - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) { - // Only handle constant GEPs here. - if (CE->getOpcode() != Instruction::GetElementPtr) break; - GEPOperator *GEP = cast<GEPOperator>(CE); - - // Make sure we're not a constant offset from an external - // global. - Value *Operand = GEP->getPointerOperand(); - Operand = Operand->stripPointerCasts(); - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) - if (!GV->hasDefinitiveInitializer()) break; - - // Get what we're pointing to and its size. - const PointerType *BaseType = - cast<PointerType>(Operand->getType()); - uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType()); - - // Get the current byte offset into the thing. Use the original - // operand in case we're looking through a bitcast. - SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end()); - const PointerType *OffsetType = - cast<PointerType>(GEP->getPointerOperand()->getType()); - uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size()); - - if (Size < Offset) { - // Out of bound reference? Negative index normalized to large - // index? Just return "I don't know". - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - return ReplaceInstUsesWith(CI, RetVal); - } - - Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); - return ReplaceInstUsesWith(CI, RetVal); - } + } else if (CallInst *MI = extractMallocCall(Op1)) { + // Get allocation size. + Type* MallocType = getMallocAllocatedType(MI); + if (MallocType && MallocType->isSized()) + if (Value *NElems = getMallocArraySize(MI, TD, true)) + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType); + } // Do not return "I don't know" here. Later optimization passes could // make it possible to evaluate objectsize to a constant. - break; + if (Size == -1ULL) + break; + + if (Size < Offset) { + // Out of bound reference? Negative index normalized to large + // index? Just return "I don't know". + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow)); + } + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset)); } case Intrinsic::bswap: // bswap(bswap(x)) -> x @@ -421,7 +353,9 @@ case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -438,7 +372,9 @@ case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -454,7 +390,7 @@ break; case Intrinsic::uadd_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); @@ -472,25 +408,28 @@ if (LHSKnownNegative && RHSKnownNegative) { // The sign bit is set in both cases: this MUST overflow. // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, &CI); - Worklist.Add(Add); + Value *Add = Builder->CreateAdd(LHS, RHS); + Add->takeName(&CI); Constant *V[] = { - UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext()) + UndefValue::get(LHS->getType()), + ConstantInt::getTrue(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } - + if (LHSKnownPositive && RHSKnownPositive) { // The sign bit is clear in both cases: this CANNOT overflow. // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, &CI); - Worklist.Add(Add); + Value *Add = Builder->CreateNUWAdd(LHS, RHS); + Add->takeName(&CI); Constant *V[] = { UndefValue::get(LHS->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } } @@ -517,7 +456,8 @@ UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -537,12 +477,42 @@ UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; - case Intrinsic::umul_with_overflow: + case Intrinsic::umul_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // Get the largest possible values for each operand. + APInt LHSMax = ~LHSKnownZero; + APInt RHSMax = ~RHSKnownZero; + + // If multiplying the maximum values does not overflow then we can turn + // this into a plain NUW mul. + bool Overflow; + LHSMax.umul_ov(RHSMax, Overflow); + if (!Overflow) { + Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); + Constant *V[] = { + UndefValue::get(LHS->getType()), + Builder->getFalse() + }; + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); + return InsertValueInst::Create(Struct, Mul, 0); + } + } // FALL THROUGH case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. if (isa<Constant>(II->getArgOperand(0)) && @@ -568,48 +538,52 @@ UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; //case Intrinsic::ppc_altivec_lvx: //case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::x86_sse_loadu_ps: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse2_loadu_dq: - // Turn PPC lvx -> load if the pointer is known aligned. - // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), - PointerType::getUnqual(II->getType())); - return new LoadInst(Ptr); - } - break; + // // Turn PPC lvx -> load if the pointer is known aligned. + // if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { + // Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + // PointerType::getUnqual(II->getType())); + // return new LoadInst(Ptr); + // } + // break; //case Intrinsic::ppc_altivec_stvx: //case Intrinsic::ppc_altivec_stvxl: - // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) { - const Type *OpPtrTy = - PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); - return new StoreInst(II->getArgOperand(0), Ptr); - } - break; + // // Turn stvx -> store if the pointer is known aligned. + // if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { + // Type *OpPtrTy = + // PointerType::getUnqual(II->getArgOperand(0)->getType()); + // Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + // return new StoreInst(II->getArgOperand(0), Ptr); + // } + // break; case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { - const Type *OpPtrTy = + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); } break; - - case Intrinsic::x86_sse_cvttss2si: { - // These intrinsics only demands the 0th element of its input vector. If + + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: { + // These intrinsics only demand the 0th element of their input vectors. If // we can simplify the input based on that, do so now. unsigned VWidth = cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); @@ -622,56 +596,102 @@ } break; } - - //case Intrinsic::ppc_altivec_vperm: - // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { - assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); - - // Check that all of the elements are integer constants or undefs. - bool AllEltsOk = true; - for (unsigned i = 0; i != 16; ++i) { - if (!isa<ConstantInt>(Mask->getOperand(i)) && - !isa<UndefValue>(Mask->getOperand(i))) { - AllEltsOk = false; - break; - } - } - - if (AllEltsOk) { - // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), - Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), - Mask->getType()); - Value *Result = UndefValue::get(Op0->getType()); - - // Only extract each element once. - Value *ExtractedElts[32]; - memset(ExtractedElts, 0, sizeof(ExtractedElts)); - - for (unsigned i = 0; i != 16; ++i) { - if (isa<UndefValue>(Mask->getOperand(i))) - continue; - unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); - Idx &= 31; // Match the hardware behavior. - - if (ExtractedElts[Idx] == 0) { - ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, - ConstantInt::get(Type::getInt32Ty(II->getContext()), - Idx&15, false)); - } - - // Insert this value into the result vector. - Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - ConstantInt::get(Type::getInt32Ty(II->getContext()), - i, false)); - } - return CastInst::Create(Instruction::BitCast, Result, CI.getType()); - } + + + case Intrinsic::x86_sse41_pmovsxbw: + case Intrinsic::x86_sse41_pmovsxwd: + case Intrinsic::x86_sse41_pmovsxdq: + case Intrinsic::x86_sse41_pmovzxbw: + case Intrinsic::x86_sse41_pmovzxwd: + case Intrinsic::x86_sse41_pmovzxdq: { + // pmov{s|z}x ignores the upper half of their input vectors. + unsigned VWidth = + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); + unsigned LowHalfElts = VWidth / 2; + APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); + APInt UndefElts(VWidth, 0); + if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), + InputDemandedElts, + UndefElts)) { + II->setArgOperand(0, TmpV); + return II; } break; + } + + //case Intrinsic::ppc_altivec_vperm: + // // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + // if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { + // assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); + // + // // Check that all of the elements are integer constants or undefs. + // bool AllEltsOk = true; + // for (unsigned i = 0; i != 16; ++i) { + // if (!isa<ConstantInt>(Mask->getOperand(i)) && + // !isa<UndefValue>(Mask->getOperand(i))) { + // AllEltsOk = false; + // break; + // } + // } + // + // if (AllEltsOk) { + // // Cast the input vectors to byte vectors. + // Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), + // Mask->getType()); + // Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), + // Mask->getType()); + // Value *Result = UndefValue::get(Op0->getType()); + // + // // Only extract each element once. + // Value *ExtractedElts[32]; + // memset(ExtractedElts, 0, sizeof(ExtractedElts)); + // + // for (unsigned i = 0; i != 16; ++i) { + // if (isa<UndefValue>(Mask->getOperand(i))) + // continue; + // unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); + // Idx &= 31; // Match the hardware behavior. + // + // if (ExtractedElts[Idx] == 0) { + // ExtractedElts[Idx] = + // Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + // Builder->getInt32(Idx&15)); + // } + // + // // Insert this value into the result vector. + // Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], + // Builder->getInt32(i)); + // } + // return CastInst::Create(Instruction::BitCast, Result, CI.getType()); + // } + // } + // break; + + //case Intrinsic::arm_neon_vld1: + //case Intrinsic::arm_neon_vld2: + //case Intrinsic::arm_neon_vld3: + //case Intrinsic::arm_neon_vld4: + //case Intrinsic::arm_neon_vld2lane: + //case Intrinsic::arm_neon_vld3lane: + //case Intrinsic::arm_neon_vld4lane: + //case Intrinsic::arm_neon_vst1: + //case Intrinsic::arm_neon_vst2: + //case Intrinsic::arm_neon_vst3: + //case Intrinsic::arm_neon_vst4: + //case Intrinsic::arm_neon_vst2lane: + //case Intrinsic::arm_neon_vst3lane: + //case Intrinsic::arm_neon_vst4lane: { + // unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD); + // unsigned AlignArg = II->getNumArgOperands() - 1; + // ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); + // if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { + // II->setArgOperand(AlignArg, + // ConstantInt::get(Type::getInt32Ty(II->getContext()), + // MemAlign, false)); + // return II; + // } + // break; + //} case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can @@ -690,7 +710,7 @@ TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa<AllocaInst>(BI)) { + if (isa<AllocaInst>(BI) || isMalloc(BI)) { CannotRemove = true; break; } @@ -709,9 +729,11 @@ } } - // If the stack restore is in a return/unwind block and if there are no - // allocas or calls between the restore and the return, nuke the restore. - if (!CannotRemove && isa<ReturnInst>(TI)) + // If the stack restore is in a return, resume, or unwind block and if there + // are no allocas or calls between the restore and the return, nuke the + // restore. + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) || + isa<UnwindInst>(TI))) return EraseInstFromFunction(CI); break; } @@ -720,6 +742,12 @@ return visitCallSite(II); } +// InvokeInst simplification +// +Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { + return visitCallSite(&II); +} + /// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, @@ -735,9 +763,9 @@ if (!CS.paramHasAttr(ix, Attribute::ByVal)) return true; - const Type* SrcTy = + Type* SrcTy = cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); - const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); + Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) @@ -745,16 +773,138 @@ return true; } +namespace { +class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls { + InstCombiner *IC; +protected: + void replaceCall(Value *With) { + NewInstruction = IC->ReplaceInstUsesWith(*CI, With); + } + bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) + return true; + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { + if (SizeCI->isAllOnesValue()) + return true; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) + return SizeCI->getZExtValue() >= Arg->getZExtValue(); + } + return false; + } +public: + InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { } + Instruction *NewInstruction; +}; +} // end anonymous namespace + +// Try to fold some different type of calls here. +// Currently we're only working with the checking functions, memcpy_chk, +// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, +// strcat_chk and strncat_chk. +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { + if (CI->getCalledFunction() == 0) return 0; + + InstCombineFortifiedLibCalls Simplifier(this); + Simplifier.fold(CI, TD); + return Simplifier.NewInstruction; +} + +static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { + // Strip off at most one level of pointer casts, looking for an alloca. This + // is good enough in practice and simpler than handling any number of casts. + Value *Underlying = TrampMem->stripPointerCasts(); + if (Underlying != TrampMem && + (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem)) + return 0; + if (!isa<AllocaInst>(Underlying)) + return 0; + + IntrinsicInst *InitTrampoline = 0; + for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end(); + I != E; I++) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I); + if (!II) + return 0; + if (II->getIntrinsicID() == Intrinsic::init_trampoline) { + if (InitTrampoline) + // More than one init_trampoline writes to this value. Give up. + return 0; + InitTrampoline = II; + continue; + } + if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) + // Allow any number of calls to adjust.trampoline. + continue; + return 0; + } + + // No call to init.trampoline found. + if (!InitTrampoline) + return 0; + + // Check that the alloca is being used in the expected way. + if (InitTrampoline->getOperand(0) != TrampMem) + return 0; + + return InitTrampoline; +} + +static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, + Value *TrampMem) { + // Visit all the previous instructions in the basic block, and try to find a + // init.trampoline which has a direct path to the adjust.trampoline. + for (BasicBlock::iterator I = AdjustTramp, + E = AdjustTramp->getParent()->begin(); I != E; ) { + Instruction *Inst = --I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::init_trampoline && + II->getOperand(0) == TrampMem) + return II; + if (Inst->mayWriteToMemory()) + return 0; + } + return 0; +} + +// Given a call to llvm.adjust.trampoline, find and return the corresponding +// call to llvm.init.trampoline if the call to the trampoline can be optimized +// to a direct call to a function. Otherwise return NULL. +// +static IntrinsicInst *FindInitTrampoline(Value *Callee) { + Callee = Callee->stripPointerCasts(); + IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); + if (!AdjustTramp || + AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) + return 0; + + Value *TrampMem = AdjustTramp->getOperand(0); + + if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) + return IT; + if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) + return IT; + return 0; +} + // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { bool Changed = false; - // If the callee is a constexpr cast of a function, attempt to move the cast - // to the arguments of the call/invoke. - if (transformConstExprCastCall(CS)) return 0; - + // If the callee is a pointer to a function, attempt to move any casts to the + // arguments of the call/invoke. Value *Callee = CS.getCalledValue(); + if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) + return 0; if (Function *CalleeF = dyn_cast<Function>(Callee)) // If the call and callee calling conventions don't match, this call must @@ -771,10 +921,14 @@ // If OldCall dues not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!OldCall->getType()->isVoidTy()) - OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); + ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); if (isa<CallInst>(OldCall)) return EraseInstFromFunction(*OldCall); - + + // We cannot remove an invoke, because it would change the CFG, just + // change the callee to a null pointer. + cast<InvokeInst>(OldCall)->setCalledFunction( + Constant::getNullValue(CalleeF->getType())); return 0; } @@ -789,21 +943,24 @@ // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) - CS.getInstruction()-> - replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); + ReplaceInstUsesWith(*CS.getInstruction(), + UndefValue::get(CS.getInstruction()->getType())); + if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + // Don't break the CFG, insert a dummy cond branch. + BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), + ConstantInt::getTrue(Callee->getContext()), II); + } return EraseInstFromFunction(*CS.getInstruction()); } - if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) - if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); + if (IntrinsicInst *II = FindInitTrampoline(Callee)) + return transformCallThroughTrampoline(CS, II); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (ISA_INVOKE_INST(Callee) ? 3 : 1); + int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); // See if we can optimize any arguments passed through the varargs area of // the call. for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), @@ -815,7 +972,23 @@ } } } - + + if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { + // Inline asm calls cannot throw - mark them 'nounwind'. + CS.setDoesNotThrow(); + Changed = true; + } + + // Try to optimize the call if possible, we require TargetData for most of + // this. None of these calls are seen as possibly dead so go ahead and + // delete the instruction now. + if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { + Instruction *I = tryOptimizeCall(CI, TD); + // If we changed something return the result, etc. Otherwise let + // the fallthrough check. + if (I) return EraseInstFromFunction(*I); + } + return Changed ? CS.getInstruction() : 0; } @@ -823,12 +996,10 @@ // attempt to move the cast to the arguments of the call/invoke. // bool InstCombiner::transformConstExprCastCall(CallSite CS) { - if (!isa<ConstantExpr>(CS.getCalledValue())) return false; - ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); - if (CE->getOpcode() != Instruction::BitCast || - !isa<Function>(CE->getOperand(0))) + Function *Callee = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (Callee == 0) return false; - Function *Callee = cast<Function>(CE->getOperand(0)); Instruction *Caller = CS.getInstruction(); const AttrListPtr &CallerPAL = CS.getAttributes(); @@ -836,9 +1007,9 @@ // would cause a type conversion of one of our arguments, change this call to // be a direct call with arguments casted to the appropriate types. // - const FunctionType *FT = Callee->getFunctionType(); - const Type *OldRetTy = Caller->getType(); - const Type *NewRetTy = FT->getReturnType(); + FunctionType *FT = Callee->getFunctionType(); + Type *OldRetTy = Caller->getType(); + Type *NewRetTy = FT->getReturnType(); if (NewRetTy->isStructTy()) return false; // TODO: Handle multiple return values. @@ -864,6 +1035,19 @@ if (RAttrs & Attribute::typeIncompatible(NewRetTy)) return false; // Attribute not compatible with transformed value. } + + // If the callsite is an invoke instruction, and the return value is used by + // a PHI node in a successor, we cannot change the return type of the call + // because there is no place to put the cast instruction (without breaking + // the critical edge). Bail out in this case. + if (!Caller->use_empty()) + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (PHINode *PN = dyn_cast<PHINode>(*UI)) + if (PN->getParent() == II->getNormalDest() || + PN->getParent() == II->getUnwindDest()) + return false; } unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); @@ -871,15 +1055,28 @@ CallSite::arg_iterator AI = CS.arg_begin(); for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - const Type *ActTy = (*AI)->getType(); + Type *ParamTy = FT->getParamType(i); + Type *ActTy = (*AI)->getType(); if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - if (CallerPAL.getParamAttributes(i + 1) - & Attribute::typeIncompatible(ParamTy)) + unsigned Attrs = CallerPAL.getParamAttributes(i + 1); + if (Attrs & Attribute::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. + + // If the parameter is passed as a byval argument, then we have to have a + // sized type and the sized type has to have the same size as the old type. + if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { + PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); + if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) + return false; + + Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); + if (TD->getTypeAllocSize(CurElTy) != + TD->getTypeAllocSize(ParamPTy->getElementType())) + return false; + } // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. @@ -891,10 +1088,19 @@ if (Callee->isDeclaration() && !isConvertible) return false; } - if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && - Callee->isDeclaration()) - return false; // Do not delete arguments unless we have a function body. + if (Callee->isDeclaration()) { + // Do not delete arguments unless we have a function body. + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) + return false; + // If the callee is just a declaration, don't change the varargsness of the + // call. We don't want to introduce a varargs call where one doesn't + // already exist. + PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); + if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) + return false; + } + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && !CallerPAL.isEmpty()) // In this case we have more arguments than the new function type, but we @@ -908,8 +1114,9 @@ return false; } + // Okay, we decided that this is a safe thing to do: go ahead and start - // inserting cast instructions as necessary... + // inserting cast instructions as necessary. std::vector<Value*> Args; Args.reserve(NumActualArgs); SmallVector<AttributeWithIndex, 8> attrVec; @@ -928,7 +1135,7 @@ AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); + Type *ParamTy = FT->getParamType(i); if ((*AI)->getType() == ParamTy) { Args.push_back(*AI); } else { @@ -955,7 +1162,7 @@ } else { // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { - const Type *PTy = getPromotedType((*AI)->getType()); + Type *PTy = getPromotedType((*AI)->getType()); if (PTy != (*AI)->getType()) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = @@ -981,44 +1188,64 @@ const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), attrVec.end()); - Instruction *NC = CallInst::Create(Callee, Args.begin(), Args.end(), - Caller->getName(), Caller); - CallInst *CI = cast<CallInst>(Caller); - if (CI->isTailCall()) - cast<CallInst>(NC)->setTailCall(); - cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); - cast<CallInst>(NC)->setAttributes(NewCallerPAL); + Instruction *NC; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NC = Builder->CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args); + NC->takeName(II); + cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); + } else { + CallInst *CI = cast<CallInst>(Caller); + NC = Builder->CreateCall(Callee, Args); + NC->takeName(CI); + if (CI->isTailCall()) + cast<CallInst>(NC)->setTailCall(); + cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); + cast<CallInst>(NC)->setAttributes(NewCallerPAL); + } // Insert a cast of the return type as necessary. Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { if (!NV->getType()->isVoidTy()) { - Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, - OldRetTy, false); + Instruction::CastOps opcode = + CastInst::getCastOpcode(NC, false, OldRetTy, false); NV = NC = CastInst::Create(opcode, NC, OldRetTy); + NC->setDebugLoc(Caller->getDebugLoc()); - InsertNewInstBefore(NC, *Caller); + // If this is an invoke instruction, we should insert it after the first + // non-phi, instruction in the normal successor block. + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); + InsertNewInstBefore(NC, *I); + } else { + // Otherwise, it's a call, just insert cast right after the call. + InsertNewInstBefore(NC, *Caller); + } Worklist.AddUsersToWorkList(*Caller); } else { NV = UndefValue::get(Caller->getType()); } } - if (!Caller->use_empty()) - Caller->replaceAllUsesWith(NV); - + ReplaceInstUsesWith(*Caller, NV); + EraseInstFromFunction(*Caller); return true; } -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. +// transformCallThroughTrampoline - Turn a call to a function created by +// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the +// underlying function. // -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { +Instruction * +InstCombiner::transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp) { Value *Callee = CS.getCalledValue(); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); const AttrListPtr &Attrs = CS.getAttributes(); // If the call already has the 'nest' attribute somewhere then give up - @@ -1026,17 +1253,17 @@ if (Attrs.hasAttrSomewhere(Attribute::Nest)) return 0; - IntrinsicInst *Tramp = - cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); + assert(Tramp && + "transformCallThroughTrampoline called with incorrect CallSite."); Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); - const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); - const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); + PointerType *NestFPTy = cast<PointerType>(NestF->getType()); + FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); const AttrListPtr &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - const Type *NestTy = 0; + Type *NestTy = 0; Attributes NestAttr = Attribute::None; // Look for a parameter marked with the 'nest' attribute. @@ -1072,7 +1299,7 @@ // Add the chain argument and attributes. Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) - NestVal = new BitCastInst(NestVal, NestTy, Caller); + NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); } @@ -1098,7 +1325,7 @@ // Handle this by synthesizing a new function type, equal to FTy // with the chain parameter inserted. - std::vector<const Type*> NewTypes; + std::vector<Type*> NewTypes; NewTypes.reserve(FTy->getNumParams()+1); // Insert the chain's type into the list of parameter types, which may @@ -1134,19 +1361,23 @@ const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), NewAttrs.end()); - Instruction *NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - if (cast<CallInst>(Caller)->isTailCall()) - cast<CallInst>(NewCaller)->setTailCall(); - cast<CallInst>(NewCaller)-> - setCallingConv(cast<CallInst>(Caller)->getCallingConv()); - cast<CallInst>(NewCaller)->setAttributes(NewPAL); - - if (!Caller->getType()->isVoidTy()) - Caller->replaceAllUsesWith(NewCaller); - Caller->eraseFromParent(); - Worklist.Remove(Caller); - return 0; + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NewCaller = InvokeInst::Create(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs); + cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); + } else { + NewCaller = CallInst::Create(NewCallee, NewArgs); + if (cast<CallInst>(Caller)->isTailCall()) + cast<CallInst>(NewCaller)->setTailCall(); + cast<CallInst>(NewCaller)-> + setCallingConv(cast<CallInst>(Caller)->getCallingConv()); + cast<CallInst>(NewCaller)->setAttributes(NewPAL); + } + + return NewCaller; } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp index c13fd2e..f10e48a 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -30,6 +31,14 @@ } if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { + // Cannot look past anything that might overflow. + OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); + if (OBI && !OBI->hasNoUnsignedWrap()) { + Scale = 1; + Offset = 0; + return Val; + } + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. @@ -71,14 +80,14 @@ // This requires TargetData to get the alloca alignment and size information. if (!TD) return 0; - const PointerType *PTy = cast<PointerType>(CI.getType()); + PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); // Get the type really allocated and the type casted to. - const Type *AllocElTy = AI.getAllocatedType(); - const Type *CastElTy = PTy->getElementType(); + Type *AllocElTy = AI.getAllocatedType(); + Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); @@ -87,10 +96,8 @@ // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && - CastElTyAlign == AllocElTyAlign) return 0; + // same, we open the door to infinite loops of various kinds. + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); @@ -134,8 +141,8 @@ if (!AI.hasOneUse()) { // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType()); - AI.replaceAllUsesWith(NewCast); + Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); + ReplaceInstUsesWith(AI, NewCast); } return ReplaceInstUsesWith(CI, New); } @@ -145,7 +152,7 @@ /// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -198,7 +205,7 @@ } case Instruction::PHI: { PHINode *OPN = cast<PHINode>(I); - PHINode *NPN = PHINode::Create(Ty); + PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); @@ -213,7 +220,7 @@ } Res->takeName(I); - return InsertNewInstBefore(Res, *I); + return InsertNewInstWith(Res, *I); } @@ -223,12 +230,12 @@ isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction - const Type *DstTy, ///< The target type for the second cast instruction + Type *DstTy, ///< The target type for the second cast instruction TargetData *TD ///< The target data for pointer size ) { - const Type *SrcTy = CI->getOperand(0)->getType(); // A from above - const Type *MidTy = CI->getType(); // B from above + Type *SrcTy = CI->getOperand(0)->getType(); // A from above + Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); @@ -254,7 +261,7 @@ /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, - const Type *Ty) { + Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; @@ -318,7 +325,7 @@ /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateTruncated(Value *V, const Type *Ty) { +static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; @@ -326,7 +333,7 @@ Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - const Type *OrigTy = V->getType(); + Type *OrigTy = V->getType(); // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. @@ -391,6 +398,11 @@ case Instruction::Trunc: // trunc(trunc(x)) -> trunc(x) return true; + case Instruction::ZExt: + case Instruction::SExt: + // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest + // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest + return true; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); return CanEvaluateTruncated(SI->getTrueValue(), Ty) && @@ -424,7 +436,7 @@ return &CI; Value *Src = CI.getOperand(0); - const Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + Type *DestTy = CI.getType(), *SrcTy = Src->getType(); // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -449,6 +461,39 @@ Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } + + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. + Value *A = 0; ConstantInt *Cst = 0; + if (Src->hasOneUse() && + match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { + // We have three types to worry about here, the type of A, the source of + // the truncate (MidSize), and the destination of the truncate. We know that + // ASize < MidSize and MidSize > ResultSize, but don't know the relation + // between ASize and ResultSize. + unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + + // If the shift amount is larger than the size of A, then the result is + // known to be zero because all the input bits got shifted out. + if (Cst->getZExtValue() >= ASize) + return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + + // Since we're doing an lshr and a zero extend, and know that the shift + // amount is smaller than ASize, it is always safe to do the shift in A's + // type, then zero extend or truncate to the result. + Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + } + + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest + // type isn't non-native. + if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && + ShouldChangeType(Src->getType(), CI.getType()) && + match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) { + Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr"); + return BinaryOperator::CreateAnd(NewTrunc, + ConstantExpr::getTrunc(Cst, CI.getType())); + } return 0; } @@ -472,13 +517,13 @@ Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits()-1); - In = Builder->CreateLShr(In, Sh); + In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); if (In->getType() != CI.getType()) In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One); + In = Builder->CreateXor(In, One, In->getName()+".not"); } return ReplaceInstUsesWith(CI, In); @@ -522,7 +567,8 @@ if (ShiftAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt)); + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), + In->getName()+".lobit"); } if ((Op1CV != 0) == isNE) { // Toggle the low bit. @@ -532,8 +578,7 @@ if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); - else - return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); } } } @@ -542,7 +587,7 @@ // It is also profitable to transform icmp eq into not(xor(A, B)) because that // may lead to additional simplifications. if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { + if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { uint32_t BitWidth = ITy->getBitWidth(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -600,7 +645,7 @@ /// clear the top bits anyway, doing this has no extra cost. /// /// This function works on both vectors and scalars. -static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) { +static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; @@ -714,7 +759,7 @@ return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -766,7 +811,7 @@ if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst); + Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); } @@ -796,8 +841,8 @@ if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { - Value *LCast = Builder->CreateZExt(LHS, CI.getType()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType()); + Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); return BinaryOperator::Create(Instruction::Or, LCast, RCast); } } @@ -840,6 +885,102 @@ return 0; } +/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { + Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative + // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive + if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) || + (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) { + + Value *Sh = ConstantInt::get(Op0->getType(), + Op0->getType()->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); + + if (Pred == ICmpInst::ICMP_SGT) + In = Builder->CreateNot(In, In->getName()+".not"); + return ReplaceInstUsesWith(CI, In); + } + + // If we know that only one bit of the LHS of the icmp can be set and we + // have an equality comparison with zero or a power of 2, we can transform + // the icmp and sext into bitwise/integer operations. + if (ICI->hasOneUse() && + ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ + unsigned BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { + Value *In = ICI->getOperand(0); + + // If the icmp tests for a known zero bit we can constant fold it. + if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) { + Value *V = Pred == ICmpInst::ICMP_NE ? + ConstantInt::getAllOnesValue(CI.getType()) : + ConstantInt::getNullValue(CI.getType()); + return ReplaceInstUsesWith(CI, V); + } + + if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { + // sext ((x & 2^n) == 0) -> (x >> n) - 1 + // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 + unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); + // Perform a right shift to place the desired bit in the LSB. + if (ShiftAmt) + In = Builder->CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // At this point "In" is either 1 or 0. Subtract 1 to turn + // {1, 0} -> {0, -1}. + In = Builder->CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); + } else { + // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 + // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 + unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); + // Perform a left shift to place the desired bit in the MSB. + if (ShiftAmt) + In = Builder->CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // Distribute the bit over the whole bit width. + In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), + BitWidth - 1), "sext"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/); + } + } + } + + // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. + if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { + if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && + Op0->getType() == CI.getType()) { + Type *EltTy = VTy->getElementType(); + + // splat the shift constant to a constant vector. + Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit"); + return ReplaceInstUsesWith(CI, In); + } + } + + return 0; +} + /// CanEvaluateSExtd - Return true if we can take the specified value /// and return it as type Ty without inserting any new casts and without /// changing the value of the common low bits. This is used by code that tries @@ -848,7 +989,7 @@ /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateSExtd(Value *V, const Type *Ty) { +static bool CanEvaluateSExtd(Value *V, Type *Ty) { assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && "Can't sign extend type to a smaller type"); // If this is a constant, it can be trivially promoted. @@ -923,7 +1064,7 @@ return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -947,7 +1088,7 @@ // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt), + return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), ShAmt); } @@ -960,34 +1101,13 @@ // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); - Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt); + Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); return BinaryOperator::CreateAShr(Res, ShAmt); } - - - // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - { - ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) { - // sext (x <s 0) to i32 --> x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) || - (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) { - Value *Sh = ConstantInt::get(CmpLHS->getType(), - CmpLHS->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, Sh); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); - - if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In); - return ReplaceInstUsesWith(CI, In); - } - } - } - - + + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) + return transformSExtICmp(ICI, CI); + // If the input is a shl/ashr pair of a same constant, then this is a sign // extension from a smaller value. If we could trust arbitrary bitwidth // integers, we could turn this into a truncate to the smaller bit and then @@ -1011,7 +1131,7 @@ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - A = Builder->CreateShl(A, ShAmtV); + A = Builder->CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } @@ -1073,7 +1193,7 @@ case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - const Type *SrcTy = OpI->getType(); + Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); if (LHSTrunc->getType() != SrcTy && @@ -1097,7 +1217,8 @@ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && Call->getCalledFunction()->getName() == "sqrt" && - Call->getNumArgOperands() == 1) { + Call->getNumArgOperands() == 1 && + Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); if (Arg && Arg->getOpcode() == Instruction::FPExt && CI.getType()->isFloatTy() && @@ -1106,7 +1227,7 @@ Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant* SqrtfFunc = M->getOrInsertFunction("sqrtf", + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", Callee->getAttributes(), Builder->getFloatTy(), Builder->getFloatTy(), @@ -1114,6 +1235,11 @@ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); + + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. + ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); + EraseInstFromFunction(*Call); return ret; } } @@ -1226,7 +1352,7 @@ // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); - const Type *GEPIdxTy = + Type *GEPIdxTy = cast<PointerType>(OrigBase->getType())->getElementType(); SmallVector<Value*, 8> NewIndices; if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { @@ -1234,9 +1360,8 @@ // and bitcast the result. This eliminates one bitcast, potentially // two. Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, - NewIndices.begin(), NewIndices.end()) : - Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); + Builder->CreateInBoundsGEP(OrigBase, NewIndices) : + Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1275,12 +1400,12 @@ /// replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, +static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. - const VectorType *SrcTy = cast<VectorType>(InVal->getType()); + VectorType *SrcTy = cast<VectorType>(InVal->getType()); if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the @@ -1300,7 +1425,7 @@ // size of the input. SmallVector<Constant*, 16> ShuffleMask; Value *V2; - const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low @@ -1323,27 +1448,219 @@ ConstantInt::get(Int32Ty, SrcElts)); } - Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size()); - return new ShuffleVectorInst(InVal, V2, Mask); + return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask)); } +static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { + return Value % Ty->getPrimitiveSizeInBits() == 0; +} + +static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { + return Value / Ty->getPrimitiveSizeInBits(); +} + +/// CollectInsertionElements - V is a value which is inserted into a vector of +/// VecEltTy. Look through the value to see if we can decompose it into +/// insertions into the vector. See the example in the comment for +/// OptimizeIntegerToVectorInsertions for the pattern this handles. +/// The type of V is always a non-zero multiple of VecEltTy's size. +/// +/// This returns false if the pattern can't be matched or true if it can, +/// filling in Elements with the elements found here. +static bool CollectInsertionElements(Value *V, unsigned ElementIndex, + SmallVectorImpl<Value*> &Elements, + Type *VecEltTy) { + // Undef values never contribute useful bits to the result. + if (isa<UndefValue>(V)) return true; + + // If we got down to a value of the right type, we win, try inserting into the + // right element. + if (V->getType() == VecEltTy) { + // Inserting null doesn't actually insert any elements. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return true; + + // Fail if multiple elements are inserted into this slot. + if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) + return false; + + Elements[ElementIndex] = V; + return true; + } + + if (Constant *C = dyn_cast<Constant>(V)) { + // Figure out the # elements this provides, and bitcast it or slice it up + // as required. + unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(), + VecEltTy); + // If the constant is the size of a vector element, we just need to bitcast + // it to the right type so it gets properly inserted. + if (NumElts == 1) + return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), + ElementIndex, Elements, VecEltTy); + + // Okay, this is a constant that covers multiple elements. Slice it up into + // pieces and insert each element-sized piece into the vector. + if (!isa<IntegerType>(C->getType())) + C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(), + C->getType()->getPrimitiveSizeInBits())); + unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); + Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); + + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), + i*ElementSize)); + Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); + if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy)) + return false; + } + return true; + } + + if (!V->hasOneUse()) return false; + + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return false; + switch (I->getOpcode()) { + default: return false; // Unhandled case. + case Instruction::BitCast: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::ZExt: + if (!isMultipleOfTypeSize( + I->getOperand(0)->getType()->getPrimitiveSizeInBits(), + VecEltTy)) + return false; + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::Or: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy) && + CollectInsertionElements(I->getOperand(1), ElementIndex, + Elements, VecEltTy); + case Instruction::Shl: { + // Must be shifting by a constant that is a multiple of the element size. + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; + unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); + + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, + Elements, VecEltTy); + } + + } +} + + +/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we +/// may be doing shifts and ors to assemble the elements of the vector manually. +/// Try to rip the code out and replace it with insertelements. This is to +/// optimize code like this: +/// +/// %tmp37 = bitcast float %inc to i32 +/// %tmp38 = zext i32 %tmp37 to i64 +/// %tmp31 = bitcast float %inc5 to i32 +/// %tmp32 = zext i32 %tmp31 to i64 +/// %tmp33 = shl i64 %tmp32, 32 +/// %ins35 = or i64 %tmp33, %tmp38 +/// %tmp43 = bitcast i64 %ins35 to <2 x float> +/// +/// Into two insertelements that do "buildvector{%inc, %inc5}". +static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, + InstCombiner &IC) { + VectorType *DestVecTy = cast<VectorType>(CI.getType()); + Value *IntInput = CI.getOperand(0); + + SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); + if (!CollectInsertionElements(IntInput, 0, Elements, + DestVecTy->getElementType())) + return 0; + + // If we succeeded, we know that all of the element are specified by Elements + // or are zero if Elements has a null entry. Recast this as a set of + // insertions. + Value *Result = Constant::getNullValue(CI.getType()); + for (unsigned i = 0, e = Elements.size(); i != e; ++i) { + if (Elements[i] == 0) continue; // Unset element. + + Result = IC.Builder->CreateInsertElement(Result, Elements[i], + IC.Builder->getInt32(i)); + } + + return Result; +} + + +/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double +/// bitcast. The various long double bitcasts can't get in here. +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + Value *Src = CI.getOperand(0); + Type *DestTy = CI.getType(); + + // If this is a bitcast from int to float, check to see if the int is an + // extraction from a vector. + Value *VecInput = 0; + // bitcast(trunc(bitcast(somevector))) + if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && + isa<VectorType>(VecInput->getType())) { + VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + } + } + + // bitcast(trunc(lshr(bitcast(somevector), cst)) + ConstantInt *ShAmt = 0; + if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShAmt)))) && + isa<VectorType>(VecInput->getType())) { + VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && + ShAmt->getZExtValue() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + unsigned Elt = ShAmt->getZExtValue() / DestWidth; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + } + } + return 0; +} Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); + Type *SrcTy = Src->getType(); + Type *DestTy = CI.getType(); // Get rid of casts from one type to the same type. These are useless and can // be replaced by the operand. if (DestTy == Src->getType()) return ReplaceInstUsesWith(CI, Src); - if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { - const PointerType *SrcPTy = cast<PointerType>(SrcTy); - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); + if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { + PointerType *SrcPTy = cast<PointerType>(SrcTy); + Type *DstElTy = DstPTy->getElementType(); + Type *SrcElTy = SrcPTy->getElementType(); // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. @@ -1374,12 +1691,16 @@ // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), - ((Instruction*)NULL)); + return GetElementPtrInst::CreateInBounds(Src, Idxs); } } + + // Try to optimize int -> float bitcasts. + if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + return I; - if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { + if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, @@ -1387,20 +1708,28 @@ // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - // If this is a cast from an integer to vector, check to see if the input - // is a trunc or zext of a bitcast from vector. If so, we can replace all - // the casts with a shuffle and (potentially) a bitcast. - if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){ - CastInst *SrcCast = cast<CastInst>(Src); - if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) - if (isa<VectorType>(BCIn->getOperand(0)->getType())) - if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + if (isa<IntegerType>(SrcTy)) { + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) { + CastInst *SrcCast = cast<CastInst>(Src); + if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) + if (isa<VectorType>(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), cast<VectorType>(DestTy), *this)) - return I; + return I; + } + + // If the input is an 'or' instruction, we may be doing shifts and ors to + // assemble the elements of the vector manually. Try to rip the code out + // and replace it with insertelements. + if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this)) + return ReplaceInstUsesWith(CI, V); } } - if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { Value *Elem = Builder->CreateExtractElement(Src,
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8fd3c58..bb1cbfa 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -13,7 +13,9 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" @@ -21,13 +23,17 @@ using namespace llvm; using namespace PatternMatch; +static ConstantInt *getOne(Constant *C) { + return ConstantInt::get(cast<IntegerType>(C->getType()), 1); +} + /// AddOne - Add one to a ConstantInt static Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C) { - return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); +static Constant *SubOne(Constant *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { @@ -37,13 +43,12 @@ static bool HasAddOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ult(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().sgt(In1->getValue()); + return Result->getValue().slt(In1->getValue()); } /// AddWithOverflow - Compute Result = In1+In2, returning true if the result @@ -52,7 +57,7 @@ Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getAdd(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasAddOverflow(ExtractElement(Result, Idx), @@ -72,13 +77,13 @@ static bool HasSubOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ugt(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().slt(In1->getValue()); + + return Result->getValue().sgt(In1->getValue()); } /// SubWithOverflow - Compute Result = In1-In2, returning true if the result @@ -87,7 +92,7 @@ Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getSub(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasSubOverflow(ExtractElement(Result, Idx), @@ -123,9 +128,8 @@ case ICmpInst::ICMP_UGT: // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); - case ICmpInst::ICMP_UGE: + return RHS->isMaxValue(true); + case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; return RHS->getValue().isSignBit(); @@ -140,7 +144,7 @@ return (~CI->getValue() + 1).isPowerOf2(); } -/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a /// set of known zero and one bits, compute the maximum and minimum values that /// could have the specified known zero and known one bits, returning them in /// min/max. @@ -157,10 +161,10 @@ // bit if it is unknown. Min = KnownOne; Max = KnownOne|UnknownBits; - + if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.set(Min.getBitWidth()-1); - Max.clear(Max.getBitWidth()-1); + Min.setBit(Min.getBitWidth()-1); + Max.clearBit(Max.getBitWidth()-1); } } @@ -176,7 +180,7 @@ KnownZero.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); APInt UnknownBits = ~(KnownZero|KnownOne); - + // The minimum value is when the unknown bits are all zeros. Min = KnownOne; // The maximum value is when the unknown bits are all ones. @@ -198,10 +202,10 @@ CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. if (!GEP->isInBounds() && TD == 0) return 0; - + ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); if (Init == 0 || Init->getNumOperands() > 1024) return 0; - + // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. // @@ -216,31 +220,31 @@ // type they index. Collect the indices. This is typically for arrays of // structs. SmallVector<unsigned, 4> LaterIndices; - - const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); + + Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (Idx == 0) return 0; // Variable index. - + uint64_t IdxVal = Idx->getZExtValue(); if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. - - if (const StructType *STy = dyn_cast<StructType>(EltTy)) + + if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); - else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { + else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { if (IdxVal >= ATy->getNumElements()) return 0; EltTy = ATy->getElementType(); } else { return 0; // Unknown type. } - + LaterIndices.push_back(IdxVal); } - + enum { Overdefined = -3, Undefined = -2 }; // Variables for our state machines. - + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form // "i == 47 | i == 87", where 47 is the first index the condition is true for, // and 87 is the second (and last) index. FirstTrueElement is -2 when @@ -251,7 +255,7 @@ // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the // form "i != 47 & i != 87". Same state transitions as for true elements. int FirstFalseElement = Undefined, SecondFalseElement = Undefined; - + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these /// define a state machine that triggers for ranges of values that the index /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. @@ -259,26 +263,25 @@ /// index in the range (inclusive). We use -2 for undefined here because we /// use relative comparisons and don't want 0-1 to match -1. int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; - + // MagicBitvector - This is a magic bitvector where we set a bit if the // comparison is true for element 'i'. If there are 64 elements or less in // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - - + + // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { Constant *Elt = Init->getOperand(i); - + // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), - LaterIndices.size()); - + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); + // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); - + // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, CompareRHS, TD); @@ -292,15 +295,15 @@ FalseRangeEnd = i; continue; } - + // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. if (!isa<ConstantInt>(C)) return 0; - + // Otherwise, we know if the comparison is true or false for this element, // update our state machines. bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); - + // State machine for single/double/range index comparison. if (IsTrueForElt) { // Update the TrueElement state machine. @@ -312,7 +315,7 @@ SecondTrueElement = i; else SecondTrueElement = Overdefined; - + // Update range state machine. if (TrueRangeEnd == (int)i-1) TrueRangeEnd = i; @@ -329,7 +332,7 @@ SecondFalseElement = i; else SecondFalseElement = Overdefined; - + // Update range state machine. if (FalseRangeEnd == (int)i-1) FalseRangeEnd = i; @@ -337,12 +340,12 @@ FalseRangeEnd = Overdefined; } } - - + + // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; - + // If all of our states become overdefined, bail out early. Since the // predicate is expensive, only check it every 8 elements. This is only // really useful for really huge arrays. @@ -362,20 +365,20 @@ if (!GEP->isInBounds() && Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); - + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); - + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); - + // True for one element -> 'i == 47'. if (SecondTrueElement == Undefined) return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); - + // True for two elements -> 'i == 47 | i == 72'. Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); @@ -389,36 +392,36 @@ // None false -> true. if (FirstFalseElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); - + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); // False for one element -> 'i != 47'. if (SecondFalseElement == Undefined) return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); - + // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } - + // If the comparison can be replaced with a range comparison for the elements // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - + // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). if (FirstTrueElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), TrueRangeEnd-FirstTrueElement+1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } - + // False range check. if (FalseRangeEnd != Overdefined) { assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); @@ -427,19 +430,19 @@ Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), FalseRangeEnd-FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } - - + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 if (Init->getNumOperands() <= 32 || (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { - const Type *Ty; + Type *Ty; if (Init->getNumOperands() <= 32) Ty = Type::getInt32Ty(Init->getContext()); else @@ -449,7 +452,7 @@ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } - + return 0; } @@ -463,12 +466,11 @@ /// to generate the first by knowing that pointer arithmetic doesn't overflow. /// /// If we can't emit an optimized form for this expression, this returns null. -/// -static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, - InstCombiner &IC) { +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - + // Check to see if this gep only has a single variable index. If so, and if // any constant indices are a multiple of its scale, then we can compute this // in terms of the scale of the variable index. For example, if the GEP @@ -480,9 +482,9 @@ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -493,33 +495,33 @@ break; } } - + // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. if (i == e) return 0; - + Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is // 4 if the variable index is into an array of i32. uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - + // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (!CI) return 0; - + // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*CI->getSExtValue(); } } - + // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. @@ -528,19 +530,20 @@ // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, - TD.getIntPtrType(VariableIdx->getContext()), &I); + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + } return VariableIdx; } - + // Otherwise, there is an index. The computation we will do will be modulo // the pointer size, so get it. uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - + Offset &= PtrSizeMask; VariableScale &= PtrSizeMask; - + // To do this transformation, any constant index must be a multiple of the // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a @@ -548,14 +551,14 @@ int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) return 0; - + // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) - VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, - true /*SExt*/, &I); + VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + true /*Signed*/); Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, &I); + return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); } /// FoldGEPICmp - Fold comparisons between a GEP instruction and something @@ -573,8 +576,8 @@ // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); - + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); + // If not, synthesize the offset the hard way. if (Offset == 0) Offset = EmitGEPOffset(GEPLHS); @@ -627,6 +630,7 @@ if (AllZeros) return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { // If the GEPs only differ by one index, compare it. unsigned NumDifferences = 0; // Keep track of # differences. @@ -649,7 +653,7 @@ ConstantInt::get(Type::getInt1Ty(I.getContext()), ICmpInst::isTrueWhenEqual(Cond))); - else if (NumDifferences == 1) { + else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); // Make sure we do a signed comparison here. @@ -660,6 +664,7 @@ // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! if (TD && + GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) @@ -682,7 +687,7 @@ bool isTrue = ICmpInst::isTrueWhenEqual(Pred); return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); } - + // (X+4) == X -> false. if (Pred == ICmpInst::ICMP_EQ) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); @@ -691,40 +696,25 @@ if (Pred == ICmpInst::ICMP_NE) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. - bool isNUW = false, isNSW = false; - if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) { - isNUW = Add->hasNoUnsignedWrap(); - isNSW = Add->hasNoSignedWrap(); - } - // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" + // so the values can never be equal. Similarly for all other "or equals" // operators. - + // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - // If this is an NUW add, then this is always false. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - Value *R = + Value *R = ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); return new ICmpInst(ICmpInst::ICMP_UGT, X, R); } - + // (X+1) >u X --> X <u (0-1) --> X != 255 // (X+2) >u X --> X <u (0-2) --> X <u 254 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 - if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { - // If this is an NUW add, then this is always true. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - } - + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); ConstantInt *SMax = ConstantInt::get(X->getContext(), APInt::getSignedMaxValue(BitWidth)); @@ -735,31 +725,16 @@ // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always false, if negative, this is always true. - if (isNSW) { - bool isTrue = CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - } - + // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 - - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always true, if negative, this is always false. - if (isNSW) { - bool isTrue = !CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); @@ -771,15 +746,15 @@ ConstantInt *DivRHS) { ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide + + // FIXME: If the operand types don't match the type of the divide // then don't attempt this transform. The code below doesn't have the // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) <s C2 produces different + // vice versa). This is because (x /s C1) <s C2 produces different // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even - // (x /u C1) <u C2. Simply casting the operands and result won't - // work. :( The if statement below tests that condition and bails - // if it finds it. + // (x /u C1) <u C2. Simply casting the operands and result won't + // work. :( The if statement below tests that condition and bails + // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) return 0; @@ -787,27 +762,33 @@ return 0; // The ProdOV computation fails on divide by zero. if (DivIsSigned && DivRHS->isAllOnesValue()) return 0; // The overflow computation also screws up here - if (DivRHS->isOne()) - return 0; // Not worth bothering, and eliminates some funny cases - // with INT_MIN. + if (DivRHS->isOne()) { + // This eliminates some funny cases with INT_MIN. + ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. + return &ICI; + } // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. + // as in the LHS instruction that we're folding. bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; // Get the ICmp opcode ICmpInst::Predicate Pred = ICI.getPredicate(); + /// If the division is known to be exact, then there is no remainder from the + /// divide, so the covered range size is unit, otherwise it is the divisor. + ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; + // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). // Compute this interval based on the constants involved and the signedness of // the compare/divide. This computes a half-open interval, keeping track of // whether either value in the interval overflows. After analysis each @@ -815,38 +796,43 @@ // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; Constant *LoBound = 0, *HiBound = 0; - + if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) LoBound = Prod; HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + if (!HiOverflow) { + // If this is not an exact divide, then many values in the range collapse + // to the same result value. + HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); + } + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; + LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); + HiBound = RangeSize; } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) HiOverflow = LoOverflow = ProdOV; if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); } else { // (X / pos) op neg // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) HiBound = AddOne(Prod); LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { - ConstantInt* DivNeg = - cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; - } + } } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + } else if (DivRHS->isNegative()) { // Divisor is < 0. + if (DivI->isExact()) + RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (CmpRHSV == 0) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS); - HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + LoBound = AddOne(RangeSize); + HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN @@ -856,14 +842,14 @@ HiBound = AddOne(Prod); HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; + LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; } else { // (X / neg) op neg LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) LoOverflow = HiOverflow = ProdOV; if (!HiOverflow) - HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); } - + // Dividing by a negative swaps the condition. LT <-> GT Pred = ICmpInst::getSwappedPredicate(Pred); } @@ -880,9 +866,8 @@ if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); - return ReplaceInstUsesWith(ICI, - InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true)); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); @@ -905,15 +890,102 @@ case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow == -1) // High bound less than input range. + if (HiOverflow == -1) // High bound less than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); } } +/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". +Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, + ConstantInt *ShAmt) { + const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = CmpRHSV.getBitWidth(); + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + if (ShAmtVal >= TypeBits || ShAmtVal == 0) + return 0; + + if (!ICI.isEquality()) { + // If we have an unsigned comparison and an ashr, we can't simplify this. + // Similarly for signed comparisons with lshr. + if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) + return 0; + + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv + // by a power of 2. Since we already have logic to simplify these, + // transform to div and then simplify the resultant comparison. + if (Shr->getOpcode() == Instruction::AShr && + (!Shr->isExact() || ShAmtVal == TypeBits - 1)) + return 0; + + // Revisit the shift (to delete it). + Worklist.Add(Shr); + + Constant *DivCst = + ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); + + Value *Tmp = + Shr->getOpcode() == Instruction::AShr ? + Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : + Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); + + ICI.setOperand(0, Tmp); + + // If the builder folded the binop, just return it. + BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); + if (TheDiv == 0) + return &ICI; + + // Otherwise, fold this div/compare. + assert(TheDiv->getOpcode() == Instruction::SDiv || + TheDiv->getOpcode() == Instruction::UDiv); + + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); + assert(Res && "This div/cst should have folded!"); + return Res; + } + + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = CmpRHSV << ShAmtVal; + ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp); + if (Shr->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (Shr->hasOneUse() && Shr->isExact()) + return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); + + if (Shr->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(ICI.getContext(), Val); + + Value *And = Builder->CreateAnd(Shr->getOperand(0), + Mask, Shr->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); + } + return 0; +} + /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". /// @@ -921,7 +993,7 @@ Instruction *LHSI, ConstantInt *RHS) { const APInt &RHSV = RHS->getValue(); - + switch (LHSI->getOpcode()) { case Instruction::Trunc: if (ICI.isEquality() && LHSI->hasOneUse()) { @@ -932,19 +1004,18 @@ APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - + // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. - APInt NewRHS(RHS->getValue()); - NewRHS.zext(SrcBits); + APInt NewRHS = RHS->getValue().zext(SrcBits); NewRHS |= KnownOne; return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantInt::get(ICI.getContext(), NewRHS)); } } break; - + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { // If this is a comparison that tests the signbit (X < 0) or (x > -1), @@ -952,21 +1023,21 @@ if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { Value *CompareVal = LHSI->getOperand(0); - + // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { + if (!XorCST->isNegative()) { ICI.setOperand(0, CompareVal); Worklist.Add(LHSI); return &ICI; } - + // Was the old condition true if the operand is positive? bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - + // If so, the new one isn't. isTrueIfPositive ^= true; - + if (isTrueIfPositive) return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS)); @@ -988,7 +1059,7 @@ } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + if (!ICI.isEquality() && XorCST->isMaxValue(true)) { const APInt &NotSignBit = XorCST->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() @@ -1005,32 +1076,42 @@ if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && LHSI->getOperand(0)->hasOneUse()) { ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); - + // If the LHS is an AND of a truncating cast, we can widen the // and/compare to be the input width without changing the value // produced, eliminating a cast. if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. + // have its sign bit set or if it is an equality comparison. // Extending a relational comparison when we're checking the sign // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue(); - NewCST.zext(BitWidth); - APInt NewCI = RHSV; - NewCI.zext(BitWidth); - Value *NewAnd = + if (ICI.isEquality() || + (!AndCST->isNegative() && RHSV.isNonNegative())) { + Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(ICI.getContext(), NewCST)); + ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); + NewAnd->takeName(LHSI); return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(ICI.getContext(), NewCI)); + ConstantExpr::getZExt(RHS, Cast->getSrcTy())); } } - + + // If the LHS is an AND of a zext, and we have an equality compare, we can + // shrink the and/compare to the smaller type, eliminating the cast. + if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { + IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); + // Make sure we don't compare the upper bits, SimplifyDemandedBits + // should fold the icmp to true/false in that case. + if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantExpr::getTrunc(AndCST, Ty)); + NewAnd->takeName(LHSI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantExpr::getTrunc(RHS, Ty)); + } + } + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This // happens a LOT in code produced by the C front-end, for bitfield @@ -1038,12 +1119,12 @@ BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) Shift = 0; - + ConstantInt *ShAmt; ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - + Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + Type *AndTy = AndCST->getType(); // Type of the and. + // We can fold this as long as we can't shift unknown bits // into the mask. This can only happen with signed shift // rights, as they sign-extend. @@ -1054,20 +1135,20 @@ // of the bits shifted in could be tested after the mask. uint32_t TyBits = Ty->getPrimitiveSizeInBits(); int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & AndCST->getValue()) == 0) CanFold = true; } - + if (CanFold) { Constant *NewCst; if (Shift->getOpcode() == Instruction::Shl) NewCst = ConstantExpr::getLShr(RHS, ShAmt); else NewCst = ConstantExpr::getShl(RHS, ShAmt); - + // Check to see if we are shifting out any of the bits being // compared. if (ConstantExpr::get(Shift->getOpcode(), @@ -1095,7 +1176,7 @@ } } } - + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is // preferable because it allows the C<<Y expression to be hoisted out // of a loop if Y is invariant and X is not. @@ -1110,16 +1191,16 @@ // Insert a logical shift. NS = Builder->CreateLShr(AndCST, Shift->getOperand(1)); } - + // Compute X & (C << Y). - Value *NewAnd = - Builder->CreateAnd(Shift->getOperand(0), NS); - + Value *NewAnd = + Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); + ICI.setOperand(0, NewAnd); return &ICI; } } - + // Try to optimize things like "A[i]&42 == 0" to index computations. if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { if (GetElementPtrInst *GEP = @@ -1141,7 +1222,6 @@ if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 // -> and (icmp eq P, null), (icmp eq Q, null). - Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, Constant::getNullValue(P->getType())); Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, @@ -1155,19 +1235,19 @@ } break; } - + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; - + uint32_t TypeBits = RHSV.getBitWidth(); - + // Check that the shift amount is in range. If not, don't perform // undefined shifts. When the shift is visited it will be // simplified. if (ShAmt->uge(TypeBits)) break; - + if (ICI.isEquality()) { // If we are comparing against bits always shifted out, the // comparison cannot succeed. @@ -1180,96 +1260,66 @@ ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } - + + // If the shift is NUW, then it is just shifting out zeros, no need for an + // AND. + if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); Constant *Mask = - ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, + ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - + Value *And = - Builder->CreateAnd(LHSI->getOperand(0),Mask); + Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(ICI.getContext(), - RHSV.lshr(ShAmtVal))); + ConstantExpr::getLShr(RHS, ShAmt)); } } - + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) <s 0 --> (X&1) != 0 - Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) << - (TypeBits-ShAmt->getZExtValue()-1)); + Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), + APInt::getOneBitSet(TypeBits, + TypeBits-ShAmt->getZExtValue()-1)); Value *And = - Builder->CreateAnd(LHSI->getOperand(0), Mask); + Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(And->getType())); } break; } - + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) case Instruction::AShr: { - // Only handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); - if (!ShAmt || !ICI.isEquality()) break; + // Handle equality comparisons of shift-by-constant. + BinaryOperator *BO = cast<BinaryOperator>(LHSI); + if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { + if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) + return Res; + } - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), - IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - // Otherwise, check to see if the bits shifted out are known to be zero. - // If so, we can compare against the unshifted value: - // (X & 4) >> 1 == 2 --> (X & 4) == 4. - if (LHSI->hasOneUse() && - MaskedValueIsZero(LHSI->getOperand(0), - APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantExpr::getShl(RHS, ShAmt)); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(ICI.getContext(), Val); - - Value *And = Builder->CreateAnd(LHSI->getOperand(0), - Mask); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); + // Handle exact shr's. + if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { + if (RHSV.isMinValue()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); } break; } - + case Instruction::SDiv: case Instruction::UDiv: // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) @@ -1308,12 +1358,12 @@ } break; } - + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. if (ICI.isEquality()) { bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and // the second operand is a constant, simplify a bit. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { switch (BO->getOpcode()) { @@ -1323,7 +1373,8 @@ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); if (V.sgt(1) && V.isPowerOf2()) { Value *NewRem = - Builder->CreateURem(BO->getOperand(0), BO->getOperand(1)); + Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); return new ICmpInst(ICI.getPredicate(), NewRem, Constant::getNullValue(BO->getType())); } @@ -1339,12 +1390,12 @@ // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - + if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) + if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { + if (BO->hasOneUse()) { Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); @@ -1354,18 +1405,27 @@ case Instruction::Xor: // For the xor case, we can xor two constants together, eliminating // the explicit xor. - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH - case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) + } else if (RHSV == 0) { + // Replace ((xor A, B) != 0) with (A != B) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), BO->getOperand(1)); + } break; - + case Instruction::Sub: + // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. + if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), + ConstantExpr::getSub(BOp0C, RHS)); + } else if (RHSV == 0) { + // Replace ((sub A, B) != 0) with (A != B) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + } + break; case Instruction::Or: // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! @@ -1373,11 +1433,11 @@ Constant *NotCI = ConstantExpr::getNot(RHS); if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); } break; - + case Instruction::And: if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { // If bits are being compared against that are and'd out, then the @@ -1386,27 +1446,31 @@ return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); - + // If we have ((X & C) == C), turn it into ((X & C) != 0). if (RHS == BOC && RHSV.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, Constant::getNullValue(RHS->getType())); - + + // Don't perform the following transforms if the AND has multiple uses + if (!BO->hasOneUse()) + break; + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(pred, X, Zero); } - + // ((X & ~7) == 0) --> X < 8 if (RHSV == 0 && isHighOnes(BOC)) { Value *X = BO->getOperand(0); Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(pred, X, NegX); } @@ -1454,11 +1518,11 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); + Type *SrcTy = LHSCIOp->getType(); + Type *DestTy = LHSCI->getType(); Value *RHSCIOp; - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && TD->getPointerSizeInBits() == @@ -1476,7 +1540,7 @@ if (RHSOp) return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); } - + // The code below only handles extension cast instructions, so far. // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && @@ -1489,9 +1553,9 @@ if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) + if (RHSCIOp->getType() != LHSCIOp->getType()) return 0; - + // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) @@ -1536,57 +1600,181 @@ return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); } - // The re-extended constant changed so the constant cannot be represented + // The re-extended constant changed so the constant cannot be represented // in the shorter type. Consequently, we cannot emit a simple comparison. + // All the cases that fold to true or false will have already been handled + // by SimplifyICmpInst, so only deal with the tricky case. - // First, handle some easy cases. We know the result cannot be equal at this - // point so handle the ICI.isEquality() cases - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (isSignedCmp || !isSignedExt) + return 0; // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. - Value *Result; - if (isSignedCmp) { - // We're performing a signed comparison. - if (cast<ConstantInt>(CI)->getValue().isNegative()) - Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false - else - Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true - } else { - // We're performing an unsigned comparison. - if (isSignedExt) { - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Result = Builder->CreateICmpSGT(LHSCIOp, NegOne); - } else { - // Unsigned extend & unsigned compare -> always true. - Result = ConstantInt::getTrue(ICI.getContext()); - } - } + + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); // Finally, return the value computed. - if (ICI.getPredicate() == ICmpInst::ICMP_ULT || - ICI.getPredicate() == ICmpInst::ICMP_SLT) + if (ICI.getPredicate() == ICmpInst::ICMP_ULT) return ReplaceInstUsesWith(ICI, Result); - assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || - ICI.getPredicate()==ICmpInst::ICMP_SGT) && - "ICmp should be folded!"); - if (Constant *CI = dyn_cast<Constant>(Result)) - return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); + assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); return BinaryOperator::CreateNot(Result); } +/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: +/// I = icmp ugt (add (add A, B), CI2), CI1 +/// If this is of the form: +/// sum = a + b +/// if (sum+128 >u 255) +/// Then replace it with llvm.sadd.with.overflow.i8. +/// +static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, + ConstantInt *CI2, ConstantInt *CI1, + InstCombiner &IC) { + // The transformation we're trying to do here is to transform this into an + // llvm.sadd.with.overflow. To do this, we have to replace the original add + // with a narrower add, and discard the add-with-constant that is part of the + // range check (if we can't eliminate it, this isn't profitable). + // In order to eliminate the add-with-constant, the compare can be its only + // use. + Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); + if (!AddWithCst->hasOneUse()) return 0; + + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. + if (!CI2->getValue().isPowerOf2()) return 0; + unsigned NewWidth = CI2->getValue().countTrailingZeros(); + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + + // The width of the new add formed is 1 more than the bias. + ++NewWidth; + + // Check to see that CI1 is an all-ones value with NewWidth bits. + if (CI1->getBitWidth() == NewWidth || + CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) + return 0; + + // In order to replace the original add with a narrower + // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant + // and truncates that discard the high bits of the add. Verify that this is + // the case. + Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); + for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); + UI != E; ++UI) { + if (*UI == AddWithCst) continue; + + // Only accept truncates for now. We would really like a nice recursive + // predicate like SimplifyDemandedBits, but which goes downwards the use-def + // chain to see which bits of a value are actually demanded. If the + // original add had another add which was then immediately truncated, we + // could still do the transformation. + TruncInst *TI = dyn_cast<TruncInst>(*UI); + if (TI == 0 || + TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + } + + // If the pattern matches, truncate the inputs to the narrower type and + // use the sadd_with_overflow intrinsic to efficiently compute both the + // result and the overflow bit. + Module *M = I.getParent()->getParent()->getParent(); + + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + NewType); + + InstCombiner::BuilderTy *Builder = IC.Builder; + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + Builder->SetInsertPoint(OrigAdd); + + Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); + Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); + CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); + Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + + // The inner add was the result of the narrow add, zero extended to the + // wider type. Replace it with the result computed by the intrinsic. + IC.ReplaceInstUsesWith(*OrigAdd, ZExt); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "sadd.overflow"); +} + +static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, + InstCombiner &IC) { + // Don't bother doing this transformation for pointers, don't do it for + // vectors. + if (!isa<IntegerType>(OrigAddV->getType())) return 0; + + // If the add is a constant expr, then we don't bother transforming it. + Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); + if (OrigAdd == 0) return 0; + + Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(OrigAdd); + + Module *M = I.getParent()->getParent()->getParent(); + Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); + Value *Add = Builder->CreateExtractValue(Call, 0); + + IC.ReplaceInstUsesWith(*OrigAdd, Add); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "uadd.overflow"); +} + +// DemandedBitsLHSMask - When performing a comparison against a constant, +// it is possible that not all the bits in the LHS are demanded. This helper +// method computes the mask that IS demanded. +static APInt DemandedBitsLHSMask(ICmpInst &I, + unsigned BitWidth, bool isSignCheck) { + if (isSignCheck) + return APInt::getSignBit(BitWidth); + + ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); + if (!CI) return APInt::getAllOnesValue(BitWidth); + const APInt &RHS = CI->getValue(); + + switch (I.getPredicate()) { + // For a UGT comparison, we don't care about any bits that + // correspond to the trailing ones of the comparand. The value of these + // bits doesn't impact the outcome of the comparison, because any value + // greater than the RHS must differ in a bit higher than these due to carry. + case ICmpInst::ICMP_UGT: { + unsigned trailingOnes = RHS.countTrailingOnes(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); + return ~lowBitsSet; + } + + // Similarly, for a ULT comparison, we don't care about the trailing zeros. + // Any value less than the RHS must differ in a higher bit because of carries. + case ICmpInst::ICMP_ULT: { + unsigned trailingZeros = RHS.countTrailingZeros(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); + return ~lowBitsSet; + } + + default: + return APInt::getAllOnesValue(BitWidth); + } + +} Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -1595,18 +1783,18 @@ std::swap(Op0, Op1); Changed = true; } - + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); + + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations if (Ty->isIntegerTy(1)) { switch (I.getPredicate()) { default: llvm_unreachable("Invalid icmp instruction!"); case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1); + Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); return BinaryOperator::CreateNot(Xor); } case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B @@ -1616,52 +1804,72 @@ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult // FALL THROUGH case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op1); } case ICmpInst::ICMP_SGT: std::swap(Op0, Op1); // Change icmp sgt -> icmp slt // FALL THROUGH case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op0); } case ICmpInst::ICMP_UGE: std::swap(Op0, Op1); // Change icmp uge -> icmp ule // FALL THROUGH case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op1); } case ICmpInst::ICMP_SGE: std::swap(Op0, Op1); // Change icmp sge -> icmp sle // FALL THROUGH case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op0); } } } unsigned BitWidth = 0; - if (TD) - BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVectorTy()) + if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); + else if (TD) // Pointers require TD info to get their size. + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); bool isSignBit = false; // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { Value *A = 0, *B = 0; - + + // Match the following pattern, which is a common idiom when writing + // overflow-safe integer arithmetic function. The source performs an + // addition in wider type, and explicitly checks for overflow using + // comparisons against INT_MIN and INT_MAX. Simplify this by using the + // sadd_with_overflow intrinsic. + // + // TODO: This could probably be generalized to handle other overflow-safe + // operations if we worked out the formulas to compute the appropriate + // magic constants. + // + // sum = a + b + // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 + { + ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) + if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) + return Res; + } + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) if (I.isEquality() && CI->isZero() && match(Op0, m_Sub(m_Value(A), m_Value(B)))) { // (icmp cond A B) if cond is equality return new ICmpInst(I.getPredicate(), A, B); } - + // If we have an icmp le or icmp ge instruction, turn it into the // appropriate icmp lt or icmp gt instruction. This allows us to rely on // them being folded in the code below. The SimplifyICmpInst code has @@ -1677,15 +1885,15 @@ return new ICmpInst(ICmpInst::ICMP_SLT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()+1)); case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } - + // If this comparison is a normal comparison, it demands all // bits, if it is a sign bit comparison, it only demands the sign bit. bool UnusedBit; @@ -1699,8 +1907,7 @@ APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); if (SimplifyDemandedBits(I.getOperandUse(0), - isSignBit ? APInt::getSignBit(BitWidth) - : APInt::getAllOnesValue(BitWidth), + DemandedBitsLHSMask(I, BitWidth, isSignBit), Op0KnownZero, Op0KnownOne, 0)) return &I; if (SimplifyDemandedBits(I.getOperandUse(1), @@ -1730,28 +1937,94 @@ // that code below can assume that Min != Max. if (!isa<Constant>(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - ConstantInt::get(I.getContext(), Op0Min), Op1); + ConstantInt::get(Op0->getType(), Op0Min), Op1); if (!isa<Constant>(Op1) && Op1Min == Op1Max) return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(I.getContext(), Op1Min)); + ConstantInt::get(Op1->getType(), Op1Min)); // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. + // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { default: llvm_unreachable("Unknown icmp opcode!"); - case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) == 0" into "x != 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) == 0" into "x != 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + break; - case ICmpInst::ICMP_NE: + } + case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + + // If all bits are known zero except for one, then we know at most one + // bit is set. If the comparison is against zero, then this is a check + // to see if *that* bit is set. + APInt Op0KnownZeroInverted = ~Op0KnownZero; + if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + // If the LHS is an AND with the same constant, look through it. + Value *LHS = 0; + ConstantInt *LHSC = 0; + if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || + LHSC->getValue() != Op0KnownZeroInverted) + LHS = Op0; + + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, + // then turn "((1 << x)&8) != 0" into "x == 3". + Value *X = 0; + if (match(LHS, m_Shl(m_One(), m_Value(X)))) { + unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), CmpVal)); + } + + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, + // then turn "((8 >>u x)&1) != 0" into "x == 3". + const APInt *CI; + if (Op0KnownZeroInverted == 1 && + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); + } + break; + } case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -1767,9 +2040,9 @@ break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -1786,9 +2059,9 @@ break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -1799,9 +2072,9 @@ break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -1814,30 +2087,30 @@ case ICmpInst::ICMP_SGE: assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_SLE: assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_UGE: assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_ULE: assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; } @@ -1865,9 +2138,9 @@ // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) return Res; @@ -1889,7 +2162,7 @@ // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::Select: { @@ -1911,10 +2184,10 @@ if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { if (!Op1) Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), - RHSC); + RHSC, I.getName()); if (!Op2) Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), - RHSC); + RHSC, I.getName()); return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); } break; @@ -1922,7 +2195,7 @@ case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == + TD->getIntPtrType(RHSC->getContext()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); @@ -1955,8 +2228,8 @@ // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { - if (Op0->getType()->isPointerTy() && - (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { + if (Op0->getType()->isPointerTy() && + (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { // We keep moving the cast from the left operand over to the right // operand, where it can often be eliminated completely. Op0 = CI->getOperand(0); @@ -1978,7 +2251,7 @@ return new ICmpInst(I.getPredicate(), Op0, Op1); } } - + if (isa<CastInst>(Op0)) { // Handle the special case of: icmp (cast bool to X), <cst> // This comes up when you have code like @@ -1990,79 +2263,213 @@ if (Instruction *R = visitICmpInstWithCastAndCast(I)) return R; } - - // See if it's the same type of instruction on the left and right. - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && - Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), - Op1I->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - - if (CI->getValue().isMaxSignedValue()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - } - break; - case Instruction::Mul: - if (!I.isEquality()) - break; - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); - } + // Special logic for binary operators. + BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); + BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); + if (BO0 || BO1) { + CmpInst::Predicate Pred = I.getPredicate(); + bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; + if (BO0 && isa<OverflowingBinaryOperator>(BO0)) + NoOp0WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); + if (BO1 && isa<OverflowingBinaryOperator>(BO1)) + NoOp1WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); + + // Analyze the case when either Op0 or Op1 is an add instruction. + // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Add) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Add) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == Op1 || B == Op1) && NoOp0WrapProblem) + return new ICmpInst(Pred, A == Op1 ? B : A, + Constant::getNullValue(Op1->getType())); + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == Op0 || D == Op0) && NoOp1WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), + C == Op0 ? D : C); + + // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y = (A == C || A == D) ? B : A; + Value *Z = (C == A || C == B) ? D : C; + return new ICmpInst(Pred, Y, Z); + } + + // Analyze the case when either Op0 or Op1 is a sub instruction. + // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). + A = 0; B = 0; C = 0; D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Sub) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Sub) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. + if (A == Op1 && NoOp0WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); + + // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. + if (C == Op0 && NoOp1WrapProblem) + return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); + + // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. + if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, A, C); + + // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. + if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, D, B); + + BinaryOperator *SRem = NULL; + // icmp (srem X, Y), Y + if (BO0 && BO0->getOpcode() == Instruction::SRem && + Op1 == BO0->getOperand(1)) + SRem = BO0; + // icmp Y, (srem X, Y) + else if (BO1 && BO1->getOpcode() == Instruction::SRem && + Op0 == BO1->getOperand(1)) + SRem = BO1; + if (SRem) { + // We don't check hasOneUse to avoid increasing register pressure because + // the value we use is the same value this instruction was already using. + switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { + default: break; + case ICmpInst::ICMP_EQ: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case ICmpInst::ICMP_NE: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), + Constant::getAllOnesValue(SRem->getType())); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), + Constant::getNullValue(SRem->getType())); + } + } + + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && + BO0->hasOneUse() && BO1->hasOneUse() && + BO0->getOperand(1) == BO1->getOperand(1)) { + switch (BO0->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); } - break; + + if (CI->isMaxValue(true)) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); + } } + break; + case Instruction::Mul: + if (!I.isEquality()) + break; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get(I.getContext(), + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + return new ICmpInst(I.getPredicate(), And1, And2); + } + } + break; + case Instruction::UDiv: + case Instruction::LShr: + if (I.isSigned()) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!BO0->isExact() || !BO1->isExact()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + case Instruction::Shl: { + bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); + bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && I.isSigned()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + } } } } - - // ~x < ~y --> y < x + { Value *A, *B; - if (match(Op0, m_Not(m_Value(A))) && - match(Op1, m_Not(m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, A); + // ~x < ~y --> y < x + // ~x < cst --> ~cst < x + if (match(Op0, m_Not(m_Value(A)))) { + if (match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) + return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + } + + // (a+b) <u a --> llvm.uadd.with.overflow. + // (a+b) <u b --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_ULT && + match(Op0, m_Add(m_Value(A), m_Value(B))) && + (Op1 == A || Op1 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) + return R; + + // a >u (a+b) --> llvm.uadd.with.overflow. + // b >u (a+b) --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op1, m_Add(m_Value(A), m_Value(B))) && + (Op0 == A || Op0 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) + return R; } - + if (I.isEquality()) { Value *A, *B, *C, *D; - - // -x == -y --> x == y - if (match(Op0, m_Neg(m_Value(A))) && - match(Op1, m_Neg(m_Value(B)))) - return new ICmpInst(I.getPredicate(), A, B); - + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 Value *OtherVal = A == Op1 ? B : A; @@ -2080,7 +2487,7 @@ Value *Xor = Builder->CreateXor(C, NC); return new ICmpInst(I.getPredicate(), A, Xor); } - + // A^B == A^D -> B == D if (A == C) return new ICmpInst(I.getPredicate(), B, D); if (A == D) return new ICmpInst(I.getPredicate(), B, C); @@ -2088,7 +2495,7 @@ if (B == D) return new ICmpInst(I.getPredicate(), A, C); } } - + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { // A == (A^B) -> B == 0 @@ -2097,22 +2504,11 @@ Constant::getNullValue(A->getType())); } - // (A-B) == A -> B == 0 - if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // A == (A-B) -> B == 0 - if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; - + if (A == C) { X = B; Y = D; Z = A; } else if (A == D) { @@ -2122,7 +2518,7 @@ } else if (B == D) { X = A; Y = C; Z = B; } - + if (X) { // Build (X^Y) & Z Op1 = Builder->CreateXor(X, Y); Op1 = Builder->CreateAnd(Op1, Z); @@ -2131,8 +2527,34 @@ return &I; } } + + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to + // "icmp (and X, mask), cst" + uint64_t ShAmt = 0; + ConstantInt *Cst1; + if (Op0->hasOneUse() && + match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), + m_ConstantInt(ShAmt))))) && + match(Op1, m_ConstantInt(Cst1)) && + // Only do this when A has multiple uses. This is most important to do + // when it exposes other optimizations. + !A->hasOneUse()) { + unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); + + if (ShAmt < ASize) { + APInt MaskV = + APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); + MaskV <<= ShAmt; + + APInt CmpV = Cst1->getValue().zext(ASize); + CmpV <<= ShAmt; + + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); + return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + } + } } - + { Value *X; ConstantInt *Cst; // icmp X+Cst, X @@ -2158,31 +2580,31 @@ Constant *RHSC) { if (!isa<ConstantFP>(RHSC)) return 0; const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); - + // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); if (MantissaWidth == -1) return 0; // Unknown. - + // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - + // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa<UIToFPInst>(LHSI); if (LHSUnsigned) ++InputSize; - + // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) return 0; - + // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is // not a NAN (it would have been previously simplified). assert(!RHS.isNaN() && "NaN comparison not already folded!"); - + ICmpInst::Predicate Pred; switch (I.getPredicate()) { default: llvm_unreachable("Unexpected predicate!"); @@ -2215,15 +2637,15 @@ case FCmpInst::FCMP_UNO: return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } - - const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); - + + IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); + // Now we know that the APFloat is a normal number, zero or inf. - + // See if the FP constant is too large for the integer. For example, // comparing an i8 to 300.0. unsigned IntWidth = IntTy->getScalarSizeInBits(); - + if (!LHSUnsigned) { // If the RHS value is > SignedMax, fold the comparison. This handles +INF // and large values. @@ -2249,7 +2671,7 @@ return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } } - + if (!LHSUnsigned) { // See if the RHS value is < SignedMin. APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); @@ -2345,7 +2767,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -2355,7 +2777,7 @@ } Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); @@ -2371,7 +2793,7 @@ I.setPredicate(FCmpInst::FCMP_UNO); I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; - + case FCmpInst::FCMP_ORD: // True if ordered (no nans) case FCmpInst::FCMP_OEQ: // True if ordered and equal case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal @@ -2382,17 +2804,57 @@ return &I; } } - + // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(Op1)) { if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) switch (LHSI->getOpcode()) { + case Instruction::FPExt: { + // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless + FPExtInst *LHSExt = cast<FPExtInst>(LHSI); + ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); + if (!RHSF) + break; + + // We can't convert a PPC double double. + if (RHSF->getType()->isPPC_FP128Ty()) + break; + + const fltSemantics *Sem; + // FIXME: This shouldn't be here. + if (LHSExt->getSrcTy()->isFloatTy()) + Sem = &APFloat::IEEEsingle; + else if (LHSExt->getSrcTy()->isDoubleTy()) + Sem = &APFloat::IEEEdouble; + else if (LHSExt->getSrcTy()->isFP128Ty()) + Sem = &APFloat::IEEEquad; + else if (LHSExt->getSrcTy()->isX86_FP80Ty()) + Sem = &APFloat::x87DoubleExtended; + else + break; + + bool Lossy; + APFloat F = RHSF->getValueAPF(); + F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. Zero is a special case + // that's OK to convert. + APFloat Fabs = F; + Fabs.clearSign(); + if (!Lossy && + ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) || Fabs.isZero())) + + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + ConstantFP::get(RHSC->getContext(), F)); + break; + } case Instruction::PHI: // Only fold fcmp into the PHI if the phi and fcmp are in the same // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::SIToFP: @@ -2411,13 +2873,13 @@ Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. Op2 = Builder->CreateFCmp(I.getPredicate(), - LHSI->getOperand(2), RHSC); + LHSI->getOperand(2), RHSC, I.getName()); } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { // Fold the known value into the constant operand. Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), - RHSC); + RHSC, I.getName()); } } @@ -2425,6 +2887,14 @@ return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } + case Instruction::FSub: { + // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C + Value *Op; + if (match(LHSI, m_FNeg(m_Value(Op)))) + return new FCmpInst(I.getSwappedPredicate(), Op, + ConstantExpr::getFNeg(RHSC)); + break; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { @@ -2438,5 +2908,17 @@ } } + // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y); + + // fcmp (fpext x), (fpext y) -> fcmp x, y + if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) + if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) + if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + RHSExt->getOperand(0)); + return Changed ? &I : 0; }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index ab355b1..7446a51 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -26,7 +26,7 @@ // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { - const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); @@ -38,17 +38,17 @@ // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - const Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0); + AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // BasicBlock::iterator It = New; - while (isa<AllocaInst>(*It) || ISA_DEBUG_INFO_INTRINSIC(*It)) ++It; + while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... @@ -57,11 +57,13 @@ Value *Idx[2]; Idx[0] = NullIdx; Idx[1] = NullIdx; - Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, It); + Instruction *GEP = + GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); + InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. - return ReplaceInstUsesWith(AI, V); + return ReplaceInstUsesWith(AI, GEP); } else if (isa<UndefValue>(AI.getArraySize())) { return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } @@ -89,28 +91,28 @@ User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); - const PointerType *DestTy = cast<PointerType>(CI->getType()); - const Type *DestPTy = DestTy->getElementType(); - if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { + PointerType *DestTy = cast<PointerType>(CI->getType()); + Type *DestPTy = DestTy->getElementType(); + if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. - if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) + if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } @@ -128,8 +130,9 @@ // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. LoadInst *NewLoad = - IC.Builder->CreateLoad(CastOp, LI.isVolatile()); + IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } @@ -144,7 +147,7 @@ // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : TD->getABITypeAlignment(LI.getType()); @@ -160,11 +163,12 @@ if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - // None of the following transforms are legal for volatile loads. - if (LI.isVolatile()) return 0; + // None of the following transforms are legal for volatile/atomic loads. + // FIXME: Some of it is okay for atomic loads; needs refactoring. + if (!LI.isSimple()) return 0; // Do really simple store-to-load forwarding and load CSE, to catch cases - // where there are several consequtive memory accesses to the same location, + // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) @@ -219,8 +223,10 @@ unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { - LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1)); - LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2)); + LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); V1->setAlignment(Align); V2->setAlignment(Align); return SelectInst::Create(SI->getCondition(), V1, V2); @@ -251,11 +257,11 @@ User *CI = cast<User>(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); - const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); - const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); + Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); + PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; @@ -275,12 +281,12 @@ NewGEPIndices.push_back(Zero); while (1) { - if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { + if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ break; NewGEPIndices.push_back(Zero); SrcPTy = STy->getElementType(0); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { NewGEPIndices.push_back(Zero); SrcPTy = ATy->getElementType(); } else { @@ -309,8 +315,8 @@ Value *NewCast; Value *SIOp0 = SI.getOperand(0); Instruction::CastOps opcode = Instruction::BitCast; - const Type* CastSrcTy = SIOp0->getType(); - const Type* CastDstTy = SrcPTy; + Type* CastSrcTy = SIOp0->getType(); + Type* CastDstTy = SrcPTy; if (CastDstTy->isPointerTy()) { if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; @@ -322,11 +328,13 @@ // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()); + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy); - return new StoreInst(NewCast, CastOp); + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, + SIOp0->getName()+".c"); + SI.setOperand(0, NewCast); + SI.setOperand(1, CastOp); + return &SI; } /// equivalentAddressValues - Test if A and B will obviously have the same @@ -362,29 +370,11 @@ Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); - // If the RHS is an alloca with a single use, zapify the store, making the - // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declares from affecting - // codegen. - if (!SI.isVolatile()) { - if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) - return EraseInstFromFunction(SI); - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { - if (isa<AllocaInst>(GEP->getOperand(0))) { - if (GEP->getOperand(0)->hasOneUse()) - return EraseInstFromFunction(SI); - } - } - } - } - // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), + TD); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : TD->getABITypeAlignment(Val->getType()); @@ -395,6 +385,23 @@ SI.setAlignment(EffectiveStoreAlign); } + // Don't hack volatile/atomic stores. + // FIXME: Some bits are legal for atomic stores; needs refactoring. + if (!SI.isSimple()) return 0; + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + if (Ptr->hasOneUse()) { + if (isa<AllocaInst>(Ptr)) + return EraseInstFromFunction(SI); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { + if (isa<AllocaInst>(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) + return EraseInstFromFunction(SI); + } + } + } + // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. @@ -404,7 +411,7 @@ --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. - if (ISA_DEBUG_INFO_INTRINSIC(BBI) || + if (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; @@ -412,8 +419,8 @@ if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { + if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); @@ -427,7 +434,7 @@ // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - !SI.isVolatile()) + LI->isSimple()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it @@ -439,9 +446,6 @@ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } - - - if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { @@ -474,7 +478,7 @@ BBI = &SI; do { ++BBI; - } while (ISA_DEBUG_INFO_INTRINSIC(BBI) || + } while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) @@ -538,17 +542,17 @@ if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. - while (ISA_DEBUG_INFO_INTRINSIC(BBI) || + while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; } - // If this isn't a store, isn't a store to the same location, or if the - // alignments differ, bail out. + // If this isn't a store, isn't a store to the same location, or is not the + // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the @@ -564,7 +568,7 @@ // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; break; } @@ -588,8 +592,7 @@ // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType()); - PN->reserveOperandSpace(2); + PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); @@ -597,11 +600,15 @@ // Advance to a place where it is safe to insert the new store and // insert it. - BBI = DestBB->getFirstNonPHI(); - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()), *BBI); - + BBI = DestBB->getFirstInsertionPt(); + StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), + SI.isVolatile(), + SI.getAlignment(), + SI.getOrdering(), + SI.getSynchScope()); + InsertNewInstBefore(NewSI, *BBI); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); + // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore);
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index c6111f0..7f48125 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -14,26 +14,76 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; -/// SubOne - Subtract one from a ConstantInt. -static Constant *SubOne(ConstantInt *C) { - return ConstantInt::get(C->getContext(), C->getValue()-1); + +/// simplifyValueKnownNonZero - The specific integer value is used in a context +/// where it is known to be non-zero. If this allows us to simplify the +/// computation, do so and return the new operand, otherwise return null. +static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { + // If V has multiple uses, then we would have to do more analysis to determine + // if this is safe. For example, the use could be in dynamically unreached + // code. + if (!V->hasOneUse()) return 0; + + bool MadeChange = false; + + // ((1 << A) >>u B) --> (1 << (A-B)) + // Because V cannot be zero, we know that B is less than A. + Value *A = 0, *B = 0, *PowerOf2 = 0; + if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), + m_Value(B))) && + // The "1" can be any value known to be a power of 2. + isPowerOfTwo(PowerOf2, IC.getTargetData())) { + A = IC.Builder->CreateSub(A, B); + return IC.Builder->CreateShl(PowerOf2, A); + } + + // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it + // inexact. Similarly for <<. + if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) + if (I->isLogicalShift() && + isPowerOfTwo(I->getOperand(0), IC.getTargetData())) { + // We know that this is an exact/nuw shift and that the input is a + // non-zero context as well. + if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { + I->setOperand(0, V2); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::LShr && !I->isExact()) { + I->setIsExact(); + MadeChange = true; + } + + if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) { + I->setHasNoUnsignedWrap(); + MadeChange = true; + } + } + + // TODO: Lots more we could do here: + // If V is a phi node, we can call this on each of its operands. + // "select cond, X, 0" can simplify to "X". + + return MadeChange ? V : 0; } + /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { uint32_t W = C1->getBitWidth(); APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); if (sign) { - LHSExt.sext(W * 2); - RHSExt.sext(W * 2); + LHSExt = LHSExt.sext(W * 2); + RHSExt = RHSExt.sext(W * 2); } else { - LHSExt.zext(W * 2); - RHSExt.zext(W * 2); + LHSExt = LHSExt.zext(W * 2); + RHSExt = RHSExt.zext(W * 2); } APInt MulExt = LHSExt * RHSExt; @@ -47,62 +97,71 @@ } Instruction *InstCombiner::visitMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) // undef * X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + if (Value *V = SimplifyMulInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return ReplaceInstUsesWith(I, V); - // ((X << C1)*C2) == (X * (C2 << C1)) - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) - if (SI->getOpcode() == Instruction::Shl) - if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) - return BinaryOperator::CreateMul(SI->getOperand(0), - ConstantExpr::getShl(CI, ShOp)); - - if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 - if (CI->equalsInt(1)) // X * 1 == X - return ReplaceInstUsesWith(I, Op0); - if (CI->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0); - - const APInt& Val = cast<ConstantInt>(CI)->getValue(); - if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C - return BinaryOperator::CreateShl(Op0, - ConstantInt::get(Op0->getType(), Val.logBase2())); + if (match(Op1, m_AllOnes())) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + + // ((X << C1)*C2) == (X * (C2 << C1)) + if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) + if (SI->getOpcode() == Instruction::Shl) + if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) + return BinaryOperator::CreateMul(SI->getOperand(0), + ConstantExpr::getShl(CI, ShOp)); + + const APInt &Val = CI->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C + Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2()); + BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst); + if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap(); + if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); + return Shl; + } + + // Canonicalize (X+C1)*CI -> X*CI+C1*CI. + { Value *X; ConstantInt *C1; + if (Op0->hasOneUse() && + match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) { + Value *Add = Builder->CreateMul(X, CI); + return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); } - } else if (Op1C->getType()->isVectorTy()) { - if (Op1C->isNullValue()) - return ReplaceInstUsesWith(I, Op1C); + } - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { - if (Op1V->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0); - - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) - if (CI->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); + // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n + // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n + // The "* (2**n)" thus becomes a potential shifting opportunity. + { + const APInt & Val = CI->getValue(); + const APInt &PosVal = Val.abs(); + if (Val.isNegative() && PosVal.isPowerOf2()) { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse()) { + ConstantInt *C1; + Value *Sub = 0; + if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) + Sub = Builder->CreateSub(X, Y, "suba"); + else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) + Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); + if (Sub) + return + BinaryOperator::CreateMul(Sub, + ConstantInt::get(Y->getType(), PosVal)); } } } - - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) - if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { - // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C); - Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); - return BinaryOperator::CreateAdd(Add, C1C2); - - } - + } + + // Simplify mul instructions with a constant RHS. + if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -135,8 +194,8 @@ BO->getOpcode() == Instruction::SDiv)) { Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - // If the division is exact, X % Y is zero. - if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) + // If the division is exact, X % Y is zero, so we end up with X or -X. + if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO)) if (SDiv->isExact()) { if (Op1BO == Op1C) return ReplaceInstUsesWith(I, Op0BO); @@ -194,7 +253,7 @@ } Instruction *InstCombiner::visitFMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); + bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Simplify mul instructions with a constant RHS... @@ -304,28 +363,6 @@ } -/// This function implements the transforms on div instructions that work -/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is -/// used by the visitors to those instructions. -/// @brief Transforms common to all three div instructions -Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // undef / X -> 0 for integer. - // undef / X -> undef for FP (the undef could be a snan). - if (isa<UndefValue>(Op0)) { - if (Op0->getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // X / undef -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); - - return 0; -} - /// This function implements the transforms common to both integer division /// instructions (udiv and sdiv). It is called by the visitors to those integer /// division instructions. @@ -333,31 +370,18 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // (sdiv X, X) --> 1 (udiv X, X) --> 1 - if (Op0 == Op1) { - if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { - Constant *CI = ConstantInt::get(Ty->getElementType(), 1); - std::vector<Constant*> Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); - } - - Constant *CI = ConstantInt::get(I.getType(), 1); - return ReplaceInstUsesWith(I, CI); + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; } - if (Instruction *Common = commonDivTransforms(I)) - return Common; - // Handle cases involving: [su]div X, (select Cond, Y, Z) // This does not apply for fdiv. if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) return &I; if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // div X, 1 == X - if (RHS->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - // (X / C1) / C2 -> X / (C1*C2) if (Instruction *LHS = dyn_cast<Instruction>(Op0)) if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) @@ -365,9 +389,8 @@ if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv)) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - else - return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - ConstantExpr::getMul(RHS, LHSRHS)); + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + ConstantExpr::getMul(RHS, LHSRHS)); } if (!RHS->isZero()) { // avoid X udiv 0 @@ -380,28 +403,41 @@ } } - // 0 / X == 0, we don't need to preserve faults! - if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) - if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // See if we can fold away this div instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; - // It can't be division by zero, hence it must be division by one. - if (I.getType()->isIntegerTy(1)) - return ReplaceInstUsesWith(I, Op0); - - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { - if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue())) - // div X, 1 == X - if (X->isOne()) - return ReplaceInstUsesWith(I, Op0); + // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y + Value *X = 0, *Z = 0; + if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 + bool isSigned = I.getOpcode() == Instruction::SDiv; + if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || + (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1))))) + return BinaryOperator::Create(I.getOpcode(), X, Op1); } return 0; } +/// dyn_castZExtVal - Checks if V is a zext or constant that can +/// be truncated to Ty without losing bits. +static Value *dyn_castZExtVal(Value *V, Type *Ty) { + if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) { + if (Z->getSrcTy() == Ty) + return Z->getOperand(0); + } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) { + if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth()) + return ConstantExpr::getTrunc(C, Ty); + } + return 0; +} + Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyUDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; @@ -410,60 +446,66 @@ // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 - return BinaryOperator::CreateLShr(Op0, + if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2 + BinaryOperator *LShr = + BinaryOperator::CreateLShr(Op0, ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + if (I.isExact()) LShr->setIsExact(); + return LShr; + } // X udiv C, where C >= signbit if (C->getValue().isNegative()) { - Value *IC = Builder->CreateICmpULT( Op0, C); + Value *IC = Builder->CreateICmpULT(Op0, C); return SelectInst::Create(IC, Constant::getNullValue(I.getType()), ConstantInt::get(I.getType(), 1)); } } // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) - if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) { - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue(); - if (C1.isPowerOf2()) { - Value *N = RHSI->getOperand(1); - const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) - N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2)); - return BinaryOperator::CreateLShr(Op0, N); - } + { const APInt *CI; Value *N; + if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { + if (*CI != 1) + N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); + if (I.isExact()) + return BinaryOperator::CreateExactLShr(Op0, N); + return BinaryOperator::CreateLShr(Op0, N); } } // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); - if (TVA.isPowerOf2() && FVA.isPowerOf2()) { - // Compute the shift amounts - uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); - // Construct the "on true" case of the select - Constant *TC = ConstantInt::get(Op0->getType(), TSA); - Value *TSI = Builder->CreateLShr(Op0, TC); + { Value *Cond; const APInt *C1, *C2; + if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { + // Construct the "on true" case of the select + Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t", + I.isExact()); - // Construct the "on false" case of the select - Constant *FC = ConstantInt::get(Op0->getType(), FSA); - Value *FSI = Builder->CreateLShr(Op0, FC); + // Construct the "on false" case of the select + Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f", + I.isExact()); + + // construct the select instruction and return it. + return SelectInst::Create(Cond, TSI, FSI); + } + } - // construct the select instruction and return it. - return SelectInst::Create(SI->getOperand(0), TSI, FSI); - } - } + // (zext A) udiv (zext B) --> zext (A udiv B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", + I.isExact()), + I.getType()); + return 0; } Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; @@ -473,20 +515,17 @@ if (RHS->isAllOnesValue()) return BinaryOperator::CreateNeg(Op0); - // sdiv X, C --> ashr X, log2(C) - if (cast<SDivOperator>(&I)->isExact() && - RHS->getValue().isNonNegative() && + // sdiv X, C --> ashr exact X, log2(C) + if (I.isExact() && RHS->getValue().isNonNegative() && RHS->getValue().isPowerOf2()) { Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), RHS->getValue().exactLogBase2()); - return BinaryOperator::CreateAShr(Op0, ShAmt); + return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName()); } // -X/C --> X/-C provided the negation doesn't overflow. if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) - if (isa<Constant>(Sub->getOperand(0)) && - cast<Constant>(Sub->getOperand(0))->isNullValue() && - Sub->hasNoSignedWrap()) + if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap()) return BinaryOperator::CreateSDiv(Sub->getOperand(1), ConstantExpr::getNeg(RHS)); } @@ -498,16 +537,15 @@ if (MaskedValueIsZero(Op0, Mask)) { if (MaskedValueIsZero(Op1, Mask)) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateUDiv(Op0, Op1); + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } - ConstantInt *ShiftedInt; - if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && - ShiftedInt->getValue().isPowerOf2()) { + + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have // the sign bit set. - return BinaryOperator::CreateUDiv(Op0, Op1); + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } } } @@ -516,27 +554,22 @@ } Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { - return commonDivTransforms(I); -} - -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef + if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) - return &I; + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + const APFloat &Op1F = Op1C->getValueAPF(); + + // If the divisor has an exact multiplicative inverse we can turn the fdiv + // into a cheaper fmul. + APFloat Reciprocal(Op1F.getSemantics()); + if (Op1F.getExactInverse(&Reciprocal)) { + ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Op0, RFP); + } + } return 0; } @@ -548,22 +581,17 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Instruction *common = commonRemTransforms(I)) - return common; + // The RHS is known non-zero. + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) { + I.setOperand(1, V); + return &I; + } - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast<Constant>(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - + if (isa<ConstantInt>(Op1)) { if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -585,51 +613,52 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyURemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *common = commonIRemTransforms(I)) return common; - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X urem C^2 -> X and C - // Check to see if this is an unsigned remainder with an exact power of 2, - // if so, convert to a bitwise and. - if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) - if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C)); + // X urem C^2 -> X and C-1 + { const APInt *C; + if (match(Op1, m_Power2(C))) + return BinaryOperator::CreateAnd(Op0, + ConstantInt::get(I.getType(), *C-1)); } - if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(RHSI, N1); - return BinaryOperator::CreateAnd(Op0, Add); - } + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(Op1, N1); + return BinaryOperator::CreateAnd(Op0, Add); + } + + // urem X, (select Cond, 2^C1, 2^C2) --> + // select Cond, (and X, C1-1), (and X, C2-1) + // when C1&C2 are powers of two. + { Value *Cond; const APInt *C1, *C2; + if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { + Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f"); + return SelectInst::Create(Cond, TrueAnd, FalseAnd); } } - // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) { - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - // STO == 0 and SFO == 0 handled above. - if ((STO->getValue().isPowerOf2()) && - (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO)); - Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO)); - return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); - } - } - } - + // (zext A) urem (zext B) --> zext (A urem B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + I.getType()); + return 0; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer rem common cases if (Instruction *Common = commonIRemTransforms(I)) return Common; @@ -650,7 +679,7 @@ APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1); + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); } } @@ -661,14 +690,14 @@ bool hasNegative = false; for (unsigned i = 0; !hasNegative && i != VWidth; ++i) if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) hasNegative = true; if (hasNegative) { std::vector<Constant *> Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); else Elts[i] = RHS; @@ -688,6 +717,14 @@ } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); -} + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + return 0; +}
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp index 2f33bf3..664546c 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" @@ -27,25 +28,40 @@ Value *LHSVal = FirstInst->getOperand(0); Value *RHSVal = FirstInst->getOperand(1); - const Type *LHSType = LHSVal->getType(); - const Type *RHSType = RHSVal->getType(); + Type *LHSType = LHSVal->getType(); + Type *RHSType = RHSVal->getType(); + + bool isNUW = false, isNSW = false, isExact = false; + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); if (!I || I->getOpcode() != Opc || !I->hasOneUse() || // Verify type of the LHS matches so we don't fold cmp's of different - // types or GEP's with different index types. + // types. I->getOperand(0)->getType() != LHSType || I->getOperand(1)->getType() != RHSType) return 0; // If they are CmpInst instructions, check their predicates - if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) - if (cast<CmpInst>(I)->getPredicate() != - cast<CmpInst>(FirstInst)->getPredicate()) + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) return 0; + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); + // Keep track of which operand needs a phi node. if (I->getOperand(0) != LHSVal) LHSVal = 0; if (I->getOperand(1) != RHSVal) RHSVal = 0; @@ -64,16 +80,16 @@ Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = 0, *NewRHS = 0; if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); + NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), + FirstInst->getOperand(0)->getName() + ".pn"); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewLHS, PN); LHSVal = NewLHS; } if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); + NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), + FirstInst->getOperand(1)->getName() + ".pn"); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewRHS, PN); RHSVal = NewRHS; @@ -94,11 +110,21 @@ } } - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); - CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - LHSVal, RHSVal); + if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) { + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } + + BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst); + BinaryOperator *NewBinOp = + BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); + if (isNUW) NewBinOp->setHasNoUnsignedWrap(); + if (isNSW) NewBinOp->setHasNoSignedWrap(); + if (isExact) NewBinOp->setIsExact(); + NewBinOp->setDebugLoc(FirstInst->getDebugLoc()); + return NewBinOp; } Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { @@ -115,6 +141,8 @@ // especially bad when the PHIs are in the header of a loop. bool NeededPhi = false; + bool AllInBounds = true; + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); @@ -122,6 +150,8 @@ GEP->getNumOperands() != FirstInst->getNumOperands()) return 0; + AllInBounds &= GEP->isInBounds(); + // Keep track of whether or not all GEPs are of alloca pointers. if (AllBasePointersAreAllocas && (!isa<AllocaInst>(GEP->getOperand(0)) || @@ -174,10 +204,10 @@ for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { if (FixedOperands[i]) continue; // operand doesn't need a phi. Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType()); + PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, + FirstOp->getName()+".pn"); InsertNewInstBefore(NewPN, PN); - NewPN->reserveOperandSpace(e); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[i] = NewPN; FixedOperands[i] = NewPN; @@ -198,11 +228,11 @@ } Value *Base = FixedOperands[0]; - return cast<GEPOperator>(FirstInst)->isInBounds() ? - GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, - FixedOperands.end()) : - GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); + GetElementPtrInst *NewGEP = + GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); + if (AllInBounds) NewGEP->setIsInBounds(); + NewGEP->setDebugLoc(FirstInst->getDebugLoc()); + return NewGEP; } @@ -211,7 +241,7 @@ /// obvious the value of the load is not changed from the point of the load to /// the end of the block it is in. /// -/// Finally, it is safe, but not profitable, to sink a load targetting a +/// Finally, it is safe, but not profitable, to sink a load targeting a /// non-address-taken alloca. Doing so will cause us to not promote the alloca /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { @@ -256,7 +286,12 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); - + + // FIXME: This is overconservative; this transform is allowed in some cases + // for atomic operations. + if (FirstLI->isAtomic()) + return 0; + // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. We currently // don't sink loads when some have their alignment specified and some don't. @@ -310,8 +345,9 @@ // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType()); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); + PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getNumIncomingValues(), + PN.getName()+".in"); Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -342,7 +378,9 @@ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); - return new LoadInst(PhiVal, isVolatile, LoadAlignment); + LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment); + NewLI->setDebugLoc(FirstLI->getDebugLoc()); + return NewLI; } @@ -363,7 +401,8 @@ // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. Constant *ConstantOp = 0; - const Type *CastSrcTy = 0; + Type *CastSrcTy = 0; + bool isNUW = false, isNSW = false, isExact = false; if (isa<CastInst>(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); @@ -380,6 +419,14 @@ ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); if (ConstantOp == 0) return FoldPHIArgBinOpIntoPHI(PN); + + if (OverflowingBinaryOperator *BO = + dyn_cast<OverflowingBinaryOperator>(FirstInst)) { + isNUW = BO->hasNoUnsignedWrap(); + isNSW = BO->hasNoSignedWrap(); + } else if (PossiblyExactOperator *PEO = + dyn_cast<PossiblyExactOperator>(FirstInst)) + isExact = PEO->isExact(); } else { return 0; // Cannot fold this operation. } @@ -395,12 +442,20 @@ } else if (I->getOperand(1) != ConstantOp) { return 0; } + + if (isNUW) + isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); + if (isNSW) + isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + if (isExact) + isExact = cast<PossiblyExactOperator>(I)->isExact(); } // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType()); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); + PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getNumIncomingValues(), + PN.getName()+".in"); Value *InVal = FirstInst->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -425,15 +480,27 @@ } // Insert and return the new operation. - if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) - return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); + if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) { + CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal, + PN.getType()); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; + } - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) { + BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + if (isNUW) BinOp->setHasNoUnsignedWrap(); + if (isNSW) BinOp->setHasNoSignedWrap(); + if (isExact) BinOp->setIsExact(); + BinOp->setDebugLoc(FirstInst->getDebugLoc()); + return BinOp; + } CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - PhiVal, ConstantOp); + CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + PhiVal, ConstantOp); + NewCI->setDebugLoc(FirstInst->getDebugLoc()); + return NewCI; } /// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle @@ -509,7 +576,7 @@ unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} // Ctor form used by DenseMap. @@ -568,6 +635,23 @@ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { PHINode *PN = PHIsToSlice[PHIId]; + // Scan the input list of the PHI. If any input is an invoke, and if the + // input is defined in the predecessor, then we won't be split the critical + // edge which is required to insert a truncate. Because of this, we have to + // bail out. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); + if (II == 0) continue; + if (II->getParent() != PN->getIncomingBlock(i)) + continue; + + // If we have a phi, and if it's directly in the predecessor, then we have + // a critical edge where we need to put the truncate. Since we can't + // split the edge in instcombine, we have to bail out. + return 0; + } + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); @@ -621,7 +705,7 @@ unsigned PHIId = PHIUsers[UserI].PHIId; PHINode *PN = PHIsToSlice[PHIId]; unsigned Offset = PHIUsers[UserI].Shift; - const Type *Ty = PHIUsers[UserI].Inst->getType(); + Type *Ty = PHIUsers[UserI].Inst->getType(); PHINode *EltPHI; @@ -630,7 +714,8 @@ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN); + EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), + PN->getName()+".off"+Twine(Offset), PN); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); @@ -667,8 +752,8 @@ Value *Res = InVal; if (Offset) Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), - Offset)); - Res = Builder->CreateTrunc(Res, Ty); + Offset), "extract"); + Res = Builder->CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); @@ -707,10 +792,7 @@ // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - - if (Value *V = PN.hasConstantValue()) + if (Value *V = SimplifyInstruction(&PN, TD)) return ReplaceInstUsesWith(PN, V); // If all PHI operands are the same operation, pull them through the PHI, @@ -757,18 +839,18 @@ // quick check to see if the PHI node only contains a single non-phi value, if // so, scan to see if the phi cycle is actually equal to that value. { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues(); // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && + while (InValNo != NumIncomingVals && isa<PHINode>(PN.getIncomingValue(InValNo))) ++InValNo; - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); + if (InValNo != NumIncomingVals) { + Value *NonPhiInVal = PN.getIncomingValue(InValNo); // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { Value *OpVal = PN.getIncomingValue(InValNo); if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) break; @@ -777,7 +859,7 @@ // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. - if (InValNo == NumOperandVals) { + if (InValNo == NumIncomingVals) { SmallPtrSet<PHINode*, 16> ValueEqualPHIs; if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) return ReplaceInstUsesWith(PN, NonPhiInVal);
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp index d8f3f4e..91e60a4 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" using namespace llvm; using namespace PatternMatch; @@ -24,14 +25,14 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); if (SI == 0) return SPF_UNKNOWN; - + ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); if (ICI == 0) return SPF_UNKNOWN; - + LHS = ICI->getOperand(0); RHS = ICI->getOperand(1); - - // (icmp X, Y) ? X : Y + + // (icmp X, Y) ? X : Y if (SI->getTrueValue() == ICI->getOperand(0) && SI->getFalseValue() == ICI->getOperand(1)) { switch (ICI->getPredicate()) { @@ -46,8 +47,8 @@ case ICmpInst::ICMP_SLE: return SPF_SMIN; } } - - // (icmp X, Y) ? Y : X + + // (icmp X, Y) ? Y : X if (SI->getTrueValue() == ICI->getOperand(1) && SI->getFalseValue() == ICI->getOperand(0)) { switch (ICI->getPredicate()) { @@ -62,9 +63,9 @@ case ICmpInst::ICMP_SLE: return SPF_SMAX; } } - + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - + return SPF_UNKNOWN; } @@ -133,10 +134,9 @@ } // Fold this by inserting a select from the input values. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0)); - InsertNewInstBefore(NewSI, SI); - return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, + Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName()+".v"); + return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } @@ -174,9 +174,8 @@ } // If we reach here, they do have operations in common. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, - OtherOpF); - InsertNewInstBefore(NewSI, SI); + Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, + OtherOpF, SI.getName()+".v"); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { if (MatchIsOpZero) @@ -195,7 +194,10 @@ ConstantInt *C2I = dyn_cast<ConstantInt>(C2); if (!C2I) return false; - return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); + if (!C1I->isZero() && !C2I->isZero()) // One side must be zero. + return false; + return C1I->isOne() || C1I->isAllOnesValue() || + C2I->isOne() || C2I->isAllOnesValue(); } /// FoldSelectIntoOp - Try fold the select into one of the operands to @@ -211,7 +213,7 @@ unsigned OpToFold = 0; if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { OpToFold = 2; } @@ -219,14 +221,20 @@ Constant *C = GetSelectFoldableConstant(TVI); Value *OOp = TVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. + // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); - InsertNewInstBefore(NewSel, SI); + Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); + BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), + FalseVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(TVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -240,7 +248,7 @@ unsigned OpToFold = 0; if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { OpToFold = 2; } @@ -248,14 +256,20 @@ Constant *C = GetSelectFoldableConstant(FVI); Value *OOp = FVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. + // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); - InsertNewInstBefore(NewSel, SI); + Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); + BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), + TrueVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(FVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -265,6 +279,64 @@ return 0; } +/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is +/// replaced with RepOp. +static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const TargetData *TD) { + // Trivial replacement. + if (V == Op) + return RepOp; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return 0; + + // If this is a binary operator, try to simplify it with the replaced op. + if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { + if (B->getOperand(0) == Op) + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD); + if (B->getOperand(1) == Op) + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD); + } + + // Same for CmpInsts. + if (CmpInst *C = dyn_cast<CmpInst>(I)) { + if (C->getOperand(0) == Op) + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD); + if (C->getOperand(1) == Op) + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD); + } + + // TODO: We could hand off more cases to instsimplify here. + + // If all operands are constant after substituting Op for RepOp then we can + // constant fold the instruction. + if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { + // Build a list of all constant operands. + SmallVector<Constant*, 8> ConstOps; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (I->getOperand(i) == Op) + ConstOps.push_back(CRepOp); + else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) + ConstOps.push_back(COp); + else + break; + } + + // All operands were constants, fold it. + if (ConstOps.size() == I->getNumOperands()) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + ConstOps, TD); + } + } + + return 0; +} + /// visitSelectInstWithICmp - Visit a SelectInst that has an /// ICmpInst as its first operand. /// @@ -278,52 +350,95 @@ Value *FalseVal = SI.getFalseValue(); // Check cases where the comparison is with a constant that - // can be adjusted to fit the min/max idiom. We may edit ICI in - // place here, so make sure the select is the only user. + // can be adjusted to fit the min/max idiom. We may move or edit ICI + // here, so make sure the select is the only user. if (ICI->hasOneUse()) if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { + // X < MIN ? T : F --> F + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT) + && CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X > MAX ? T : F --> F + else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT) + && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) + return ReplaceInstUsesWith(SI, FalseVal); switch (Pred) { default: break; case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: { - // X < MIN ? T : F --> F - if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = - ConstantInt::get(CI->getContext(), CI->getValue()-1); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } + case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: { - // X > MAX ? T : F --> F - if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) - return ReplaceInstUsesWith(SI, FalseVal); + // These transformations only work for selects over integers. + IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); + if (!SelectTy) + break; + + Constant *AdjustedRHS; + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1); + else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) + AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1); + // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = - ConstantInt::get(CI->getContext(), CI->getValue()+1); + // X < C ? X : C-1 --> X > C-1 ? C-1 : X if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) + ; // Nothing to do here. Values match without any sign/zero extension. + + // Types do not match. Instead of calculating this with mixed types + // promote all to the larger type. This enables scalar evolution to + // analyze this expression. + else if (CmpRHS->getType()->getScalarSizeInBits() + < SelectTy->getBitWidth()) { + Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy); + + // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X + // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X + // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X + // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X + if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = sextRHS; + } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) && + sextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = sextRHS; + } else if (ICI->isUnsigned()) { + Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy); + // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X + // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X + // zext + signed compare cannot be changed: + // 0xff <s 0x00, but 0x00ff >s 0x0000 + if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == FalseVal) { + CmpLHS = TrueVal; + AdjustedRHS = zextRHS; + } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) && + zextRHS == TrueVal) { + CmpLHS = FalseVal; + AdjustedRHS = zextRHS; + } else + break; + } else + break; + } else + break; + + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(0, CmpLHS); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + + // Move ICI instruction right before the select instruction. Otherwise + // the sext/zext value may be defined after the ICI instruction uses it. + ICI->moveBefore(&SI); + + Changed = true; break; } } @@ -334,7 +449,7 @@ // FIXME: Type and constness constraints could be lifted, but we have to // watch code size carefully. We should consider xor instead of // sub/add when we decide to do that. - if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { + if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { ConstantInt *C1 = NULL, *C2 = NULL; @@ -360,24 +475,39 @@ } } - if (CmpLHS == TrueVal && CmpRHS == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (Pred == ICmpInst::ICMP_EQ) + // If we have an equality comparison then we know the value in one of the + // arms of the select. See if substituting this value into the arm and + // simplifying the result yields the same value as the other arm. + if (Pred == ICmpInst::ICMP_EQ) { + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? X : Y -> X - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - - } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { - // Transform (X == Y) ? Y : X -> X - if (Pred == ICmpInst::ICMP_EQ) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? Y : X -> Y - if (Pred == ICmpInst::ICMP_NE) + } else if (Pred == ICmpInst::ICMP_NE) { + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + return ReplaceInstUsesWith(SI, TrueVal); } + + // NOTE: if we wanted to, this is where to detect integer MIN/MAX + + if (isa<Constant>(CmpRHS)) { + if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { + // Transform (X == C) ? X : Y -> (X == C) ? C : Y + SI.setOperand(1, CmpRHS); + Changed = true; + } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { + // Transform (X != C) ? Y : X -> (X != C) ? Y : C + SI.setOperand(2, CmpRHS); + Changed = true; + } + } + return Changed ? &SI : 0; } @@ -399,28 +529,28 @@ // can always be mapped. const Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return true; - + // If V is a PHI node defined in the same block as the condition PHI, we can // map the arguments. const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); - + if (const PHINode *VP = dyn_cast<PHINode>(I)) if (VP->getParent() == CondPHI->getParent()) return true; - + // Otherwise, if the PHI and select are defined in the same block and if V is // defined in a different block, then we can transform it. if (SI.getParent() == CondPHI->getParent() && I->getParent() != CondPHI->getParent()) return true; - + // Otherwise we have a 'hard' case and we can't tell without doing more // detailed dominator based analysis, punt. return false; } /// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: -/// SPF2(SPF1(A, B), C) +/// SPF2(SPF1(A, B), C) Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, @@ -431,7 +561,7 @@ // MIN(MIN(a, b), a) -> MIN(a, b) if (SPF1 == SPF2) return ReplaceInstUsesWith(Outer, Inner); - + // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || @@ -440,13 +570,81 @@ (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) return ReplaceInstUsesWith(Outer, C); } - + // TODO: MIN(MIN(A, 23), 97) return 0; } +/// foldSelectICmpAnd - If one of the constants is zero (we know they can't +/// both be) and we have an icmp instruction with zero, and we have an 'and' +/// with the non-constant value and a power of two we can turn the select +/// into a shift on the result of the 'and'. +static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, + ConstantInt *FalseVal, + InstCombiner::BuilderTy *Builder) { + const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); + if (!IC || !IC->isEquality()) + return 0; + if (!match(IC->getOperand(1), m_Zero())) + return 0; + + ConstantInt *AndRHS; + Value *LHS = IC->getOperand(0); + if (LHS->getType() != SI.getType() || + !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) + return 0; + + // If both select arms are non-zero see if we have a select of the form + // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic + // for 'x ? 2^n : 0' and fix the thing up at the end. + ConstantInt *Offset = 0; + if (!TrueVal->isZero() && !FalseVal->isZero()) { + if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) + Offset = FalseVal; + else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) + Offset = TrueVal; + else + return 0; + + // Adjust TrueVal and FalseVal to the offset. + TrueVal = ConstantInt::get(Builder->getContext(), + TrueVal->getValue() - Offset->getValue()); + FalseVal = ConstantInt::get(Builder->getContext(), + FalseVal->getValue() - Offset->getValue()); + } + + // Make sure the mask in the 'and' and one of the select arms is a power of 2. + if (!AndRHS->getValue().isPowerOf2() || + (!TrueVal->getValue().isPowerOf2() && + !FalseVal->getValue().isPowerOf2())) + return 0; + + // Determine which shift is needed to transform result of the 'and' into the + // desired result. + ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; + unsigned ValZeros = ValC->getValue().logBase2(); + unsigned AndZeros = AndRHS->getValue().logBase2(); + + Value *V = LHS; + if (ValZeros > AndZeros) + V = Builder->CreateShl(V, ValZeros - AndZeros); + else if (ValZeros < AndZeros) + V = Builder->CreateLShr(V, AndZeros - ValZeros); + + // Okay, now we know that everything is set up, we just don't know whether we + // have a icmp_ne or icmp_eq and whether the true or false val is the zero. + bool ShouldNotVal = !TrueVal->isZero(); + ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; + if (ShouldNotVal) + V = Builder->CreateXor(V, ValC); + + // Apply an offset if needed. + if (Offset) + V = Builder->CreateAdd(V, Offset); + return V; +} Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); @@ -463,8 +661,7 @@ return BinaryOperator::CreateOr(CondVal, FalseVal); } // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal), SI); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { if (C->getZExtValue() == false) { @@ -472,11 +669,10 @@ return BinaryOperator::CreateAnd(CondVal, TrueVal); } // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal), SI); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateOr(NotCond, TrueVal); } - + // select a, b, a -> a&b // select a, a, b -> a|b if (CondVal == TrueVal) @@ -495,44 +691,21 @@ // select C, -1, 0 -> sext C to int if (FalseValC->isZero() && TrueValC->isAllOnesValue()) return new SExtInst(CondVal, SI.getType()); - + // select C, 0, 1 -> zext !C to int if (TrueValC->isZero() && FalseValC->getValue() == 1) { - Value *NotCond = Builder->CreateNot(CondVal); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return new ZExtInst(NotCond, SI.getType()); } // select C, 0, -1 -> sext !C to int if (TrueValC->isZero() && FalseValC->isAllOnesValue()) { - Value *NotCond = Builder->CreateNot(CondVal); + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return new SExtInst(NotCond, SI.getType()); } - - if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { - // If one of the constants is zero (we know they can't both be) and we - // have an icmp instruction with zero, and we have an 'and' with the - // non-constant value, eliminate this whole mess. This corresponds to - // cases like this: ((X & 27) ? 27 : 0) - if (TrueValC->isZero() || FalseValC->isZero()) - if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) && - cast<Constant>(IC->getOperand(1))->isNullValue()) - if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0))) - if (ICA->getOpcode() == Instruction::And && - isa<ConstantInt>(ICA->getOperand(1)) && - (ICA->getOperand(1) == TrueValC || - ICA->getOperand(1) == FalseValC) && - cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) { - // Okay, now we know that everything is set up, we just don't - // know whether we have a icmp_ne or icmp_eq and whether the - // true or false val is the zero. - bool ShouldNotVal = !TrueValC->isZero(); - ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; - Value *V = ICA; - if (ShouldNotVal) - V = Builder->CreateXor(V, ICA->getOperand(1)); - return ReplaceInstUsesWith(SI, V); - } - } + + if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder)) + return ReplaceInstUsesWith(SI, V); } // See if we are selecting two values based on a comparison of the two values. @@ -540,7 +713,7 @@ if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { // Transform (X == Y) ? X : Y -> Y if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -552,7 +725,7 @@ } // Transform (X une Y) ? X : Y -> X if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -567,7 +740,7 @@ } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ // Transform (X == Y) ? Y : X -> X if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -579,7 +752,7 @@ } // Transform (X une Y) ? Y : X -> Y if (FCI->getPredicate() == FCmpInst::FCMP_UNE) { - // This is not safe in general for floating point: + // This is not safe in general for floating point: // consider X== -0, Y== +0. // It becomes safe if either operand is a nonzero constant. ConstantFP *CFPt, *CFPf; @@ -635,23 +808,24 @@ // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z - if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { - NegVal = ConstantExpr::getNeg(C); + if (SI.getType()->isFPOrFPVectorTy()) { + NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); } else { - NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1)), SI); + NegVal = Builder->CreateNeg(SubOp->getOperand(1)); } Value *NewTrueOp = OtherAddOp; Value *NewFalseOp = NegVal; if (AddOp != TI) std::swap(NewTrueOp, NewFalseOp); - Instruction *NewSel = - SelectInst::Create(CondVal, NewTrueOp, - NewFalseOp); + Value *NewSel = + Builder->CreateSelect(CondVal, NewTrueOp, + NewFalseOp, SI.getName() + ".p"); - NewSel = InsertNewInstBefore(NewSel, SI); - return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); + if (SI.getType()->isFPOrFPVectorTy()) + return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); + else + return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); } } } @@ -660,7 +834,7 @@ if (SI.getType()->isIntegerTy()) { if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; - + // MAX(MAX(a, b), a) -> MAX(a, b) // MIN(MIN(a, b), a) -> MIN(a, b) // MAX(MIN(a, b), a) -> a @@ -683,13 +857,26 @@ } // See if we can fold the select into a phi node if the condition is a select. - if (isa<PHINode>(SI.getCondition())) + if (isa<PHINode>(SI.getCondition())) // The true/false values have to be live in the PHI predecessor's blocks. if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) if (Instruction *NV = FoldOpIntoPhi(SI)) return NV; + if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { + if (TrueSI->getCondition() == CondVal) { + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } + } + if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { + if (FalseSI->getCondition() == CondVal) { + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } + } + if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); SI.setOperand(1, FalseVal);
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp index 1fc73d6..6d85add 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -13,6 +13,8 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -21,25 +23,6 @@ assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // shl X, 0 == X and shr X, 0 == X - // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Constant::getNullValue(Op1->getType()) || - Op0 == Constant::getNullValue(Op0->getType())) - return ReplaceInstUsesWith(I, Op0); - - if (isa<UndefValue>(Op0)) { - if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef - return ReplaceInstUsesWith(I, Op0); - else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa<UndefValue>(Op1)) { - if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X - return ReplaceInstUsesWith(I, Op0); - else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - // See if we can fold away this shift. if (SimplifyDemandedInstructionBits(I)) return &I; @@ -53,13 +36,295 @@ if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; + + // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. + // Because shifts by negative values (which could occur if A were negative) + // are undefined. + Value *A; const APInt *B; + if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { + // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't + // demand the sign bit (and many others) here?? + Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1), + Op1->getName()); + I.setOperand(1, Rem); + return &I; + } + return 0; } +/// CanEvaluateShifted - See if we can compute the specified value, but shifted +/// logically to the left or right by some number of bits. This should return +/// true if the expression can be computed for the same cost as the current +/// expression tree. This is used to eliminate extraneous shifting from things +/// like: +/// %C = shl i128 %A, 64 +/// %D = shl i128 %B, 96 +/// %E = or i128 %C, %D +/// %F = lshr i128 %E, 64 +/// where the client will ask if E can be computed shifted right by 64-bits. If +/// this succeeds, the GetShiftedValue function will be called to produce the +/// value. +static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If this is the opposite shift, we can directly reuse the input of the shift + // if the needed bits are already zero in the input. This allows us to reuse + // the value which means that we don't care if the shift has multiple uses. + // TODO: Handle opposite shift by exact value. + ConstantInt *CI = 0; + if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || + (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { + if (CI->getZExtValue() == NumBits) { + // TODO: Check that the input bits are already zero with MaskedValueIsZero +#if 0 + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } +#endif + + } + } + + // We can't mutate something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + default: return false; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && + CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); + + case Instruction::Shl: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) return true; + + // We can always turn shl(c)+shr(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned LowBits = TypeWidth - CI->getZExtValue(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + return true; + } + + return false; + } + case Instruction::LShr: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) return true; + + // We can always turn lshr(c)+shl(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned LowBits = CI->getZExtValue() - NumBits; + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) + return true; + } + + return false; + } + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && + CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) + return false; + return true; + } + } +} + +/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, +/// this value inserts the new computation that produces the shifted value. +static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (Constant *C = dyn_cast<Constant>(V)) { + if (isLeftShift) + V = IC.Builder->CreateShl(C, NumBits); + else + V = IC.Builder->CreateLShr(C, NumBits); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + V = ConstantFoldConstantExpression(CE, IC.getTargetData()); + return V; + } + + Instruction *I = cast<Instruction>(V); + IC.Worklist.Add(I); + + switch (I->getOpcode()) { + default: assert(0 && "Inconsistency with CanEvaluateShifted"); + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + return I; + + case Instruction::Shl: { + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); + + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(I->getType()); + + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return I; + } + + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(BO->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(BO); + VI->takeName(BO); + } + return V; + } + + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return BO; + } + case Instruction::LShr: { + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(BO->getType()); + + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setIsExact(false); + return I; + } + + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(I->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(I); + VI->takeName(I); + } + return V; + } + + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setIsExact(false); + return BO; + } + + case Instruction::Select: + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + return I; + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), + NumBits, isLeftShift, IC)); + return PN; + } + } +} + + + Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; - + + + // See if we can propagate this shift into the input, this covers the trivial + // cast of lshr(shl(x,c1),c2) as well as other more complex cases. + if (I.getOpcode() != Instruction::AShr && + CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" + " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); + + return ReplaceInstUsesWith(I, + GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + } + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); @@ -103,7 +368,7 @@ // Okay, we'll do this xform. Make the shift of shift. Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt); + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to @@ -126,7 +391,8 @@ // shift1 & 0x00FF Value *And = Builder->CreateAnd(NSh, - ConstantInt::get(I.getContext(), MaskV)); + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); @@ -150,9 +416,10 @@ match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1); + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1); + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); @@ -166,9 +433,11 @@ m_ConstantInt(CC))) && cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1); + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1)); + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } @@ -180,9 +449,10 @@ match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1); + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS); + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); @@ -196,9 +466,10 @@ cast<BinaryOperator>(Op0BO->getOperand(0)) ->getOperand(0)->hasOneUse()) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1); + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1)); + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -266,7 +537,7 @@ uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - const IntegerType *Ty = cast<IntegerType>(I.getType()); + IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { @@ -282,39 +553,17 @@ ConstantInt::get(Ty, AmtSum)); } - if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { - if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { - // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. - if (AmtSum >= TypeBits) - AmtSum = TypeBits-1; - - Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(), Mask)); - } - - // Okay, if we get here, one shift must be left, and the other shift must be - // right. See if the amounts are equal. if (ShiftAmt1 == ShiftAmt2) { // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),Mask)); } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), Mask)); @@ -323,7 +572,8 @@ uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); @@ -334,7 +584,8 @@ } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::Shl); Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); @@ -349,9 +600,8 @@ uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, ConstantInt::get(Ty, ShiftDiff)); @@ -361,8 +611,8 @@ } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); @@ -377,16 +627,56 @@ } Instruction *InstCombiner::visitShl(BinaryOperator &I) { - return commonShiftTransforms(I); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), + I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + TD)) + return ReplaceInstUsesWith(I, V); + + if (Instruction *V = commonShiftTransforms(I)) + return V; + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { + unsigned ShAmt = Op1C->getZExtValue(); + + // If the shifted-out value is known-zero, then this is a NUW shift. + if (!I.hasNoUnsignedWrap() && + MaskedValueIsZero(I.getOperand(0), + APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { + I.setHasNoUnsignedWrap(); + return &I; + } + + // If the shifted out value is all signbits, this is a NSW shift. + if (!I.hasNoSignedWrap() && + ComputeNumSignBits(I.getOperand(0)) > ShAmt) { + I.setHasNoSignedWrap(); + return &I; + } + } + + // (C1 << A) << C2 -> (C1 << C2) << A + Constant *C1, *C2; + Value *A; + if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) && + match(I.getOperand(1), m_Constant(C2))) + return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + + return 0; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *R = commonShiftTransforms(I)) return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) { unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); // ctlz.i32(x)>>5 --> zext(x == 0) @@ -395,7 +685,7 @@ if ((II->getIntrinsicID() == Intrinsic::ctlz || II->getIntrinsicID() == Intrinsic::cttz || II->getIntrinsicID() == Intrinsic::ctpop) && - isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){ + isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) { bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); @@ -403,29 +693,37 @@ } } + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } + } + return 0; } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), + I.isExact(), TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *R = commonShiftTransforms(I)) return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) { - // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) - if (CSI->isAllOnesValue()) - return ReplaceInstUsesWith(I, CSI); - } - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + unsigned ShAmt = Op1C->getZExtValue(); + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we // have a sign-extend idiom. Value *X; if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) { - // If the input value is known to already be sign extended enough, delete - // the extension. - if (ComputeNumSignBits(X) > Op1C->getZExtValue()) + // If the left shift is just shifting out partial signbits, delete the + // extension. + if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) return ReplaceInstUsesWith(I, X); // If the input is an extension from the shifted amount value, e.g. @@ -440,6 +738,13 @@ return new SExtInst(ZI->getOperand(0), ZI->getType()); } } + + // If the shifted-out value is known-zero, then this is an exact shift. + if (!I.isExact() && + MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ + I.setIsExact(); + return &I; + } } // See if we can turn a signed shr into an unsigned shr.
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 02609cb..5cd9a4b 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -34,7 +34,7 @@ if (!OpC) return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); if ((~Demanded & OpC->getValue()) == 0) return false; @@ -103,7 +103,7 @@ assert(V != 0 && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); - const Type *VTy = V->getType(); + Type *VTy = V->getType(); assert((TD || !VTy->isPointerTy()) && "SimplifyDemandedBits needs to know bit widths!"); assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && @@ -121,13 +121,13 @@ } if (isa<ConstantPointerNull>(V)) { // We know all of the bits for a constant! - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = DemandedMask; return 0; } - KnownZero.clear(); - KnownOne.clear(); + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); if (DemandedMask == 0) { // Not demanding any bits from V. if (isa<UndefValue>(V)) return 0; @@ -311,8 +311,9 @@ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1)); - return InsertNewInstBefore(Or, *I); + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstWith(Or, *I); } // If all of the demanded bits on one side are known, and all of the set @@ -324,9 +325,8 @@ if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { Constant *AndC = Constant::getIntegerValue(VTy, ~RHSKnownOne & DemandedMask); - Instruction *And = - BinaryOperator::CreateAnd(I->getOperand(0), AndC); - return InsertNewInstBefore(And, *I); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + return InsertNewInstWith(And, *I); } } @@ -350,15 +350,13 @@ Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); - Instruction *NewAnd = - BinaryOperator::CreateAnd(I->getOperand(0), AndC); - InsertNewInstBefore(NewAnd, *I); + Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + InsertNewInstWith(NewAnd, *I); Constant *XorC = ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); - Instruction *NewXor = - BinaryOperator::CreateXor(NewAnd, XorC); - return InsertNewInstBefore(NewXor, *I); + Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); + return InsertNewInstWith(NewXor, *I); } // Output known-0 bits are known if clear or set in both the LHS & RHS. @@ -387,15 +385,15 @@ break; case Instruction::Trunc: { unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.zext(truncBf); - KnownZero.zext(truncBf); - KnownOne.zext(truncBf); + DemandedMask = DemandedMask.zext(truncBf); + KnownZero = KnownZero.zext(truncBf); + KnownOne = KnownOne.zext(truncBf); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; - DemandedMask.trunc(BitWidth); - KnownZero.trunc(BitWidth); - KnownOne.trunc(BitWidth); + DemandedMask = DemandedMask.trunc(BitWidth); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; } @@ -403,8 +401,8 @@ if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) return 0; // vector->int or fp->int? - if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { - if (const VectorType *SrcVTy = + if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. @@ -425,15 +423,15 @@ // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + DemandedMask = DemandedMask.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; - DemandedMask.zext(BitWidth); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + DemandedMask = DemandedMask.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // The top bits are known to be zero. KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); @@ -450,17 +448,17 @@ // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. if ((NewBits & DemandedMask) != 0) - InputDemandedBits.set(SrcBitWidth-1); + InputDemandedBits.setBit(SrcBitWidth-1); - InputDemandedBits.trunc(SrcBitWidth); - KnownZero.trunc(SrcBitWidth); - KnownOne.trunc(SrcBitWidth); + InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, KnownOne, Depth+1)) return I; - InputDemandedBits.zext(BitWidth); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + InputDemandedBits = InputDemandedBits.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -470,8 +468,8 @@ // convert this into a zero extension. if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { // Convert to ZExt cast - CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy); - return InsertNewInstBefore(NewCast, *I); + CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); + return InsertNewInstWith(NewCast, *I); } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set KnownOne |= NewBits; } @@ -512,8 +510,9 @@ // Turn it into OR if input bits are zero. if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1)); - return InsertNewInstBefore(Or, *I); + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstWith(Or, *I); } // We can say something about the output known-zero and known-one bits, @@ -574,8 +573,16 @@ break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + + // If the shift is NUW/NSW, then it does demand the high bits. + ShlOperator *IOp = cast<ShlOperator>(I); + if (IOp->hasNoSignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + else if (IOp->hasNoUnsignedWrap()) + DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -590,10 +597,16 @@ case Instruction::LShr: // For a logical shift right if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<LShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -615,8 +628,8 @@ if (DemandedMask == 1) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), I->getOperand(1)); - return InsertNewInstBefore(NewVal, *I); + I->getOperand(0), I->getOperand(1), I->getName()); + return InsertNewInstWith(NewVal, *I); } // If the sign bit is the only bit demanded by this ashr, then there is no @@ -625,14 +638,20 @@ return I->getOperand(0); if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); // If any of the "high bits" are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.set(BitWidth-1); + DemandedMaskIn.setBit(BitWidth-1); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<AShrOperator>(I)->isExact()) + DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -653,8 +672,8 @@ (HighBits & ~DemandedMask) == HighBits) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), SA); - return InsertNewInstBefore(NewVal, *I); + I->getOperand(0), SA, I->getName()); + return InsertNewInstWith(NewVal, *I); } else if ((KnownOne & SignBit) != 0) { // New bits are known one. KnownOne |= HighBits; } @@ -662,6 +681,10 @@ break; case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (Rem->isAllOnesValue()) + break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits @@ -690,6 +713,18 @@ assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); } } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } break; case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); @@ -736,12 +771,16 @@ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); - return InsertNewInstBefore(NewVal, *I); + return InsertNewInstWith(NewVal, *I); } // TODO: Could compute known zero/one bits based on the input. break; } + case Intrinsic::x86_sse42_crc32_64_8: + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + return 0; } } ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); @@ -784,17 +823,17 @@ UndefElts = 0; if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; for (unsigned i = 0; i != VWidth; ++i) if (!DemandedElts[i]) { // If not demanded, set to undef. Elts.push_back(Undef); - UndefElts.set(i); + UndefElts.setBit(i); } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef. Elts.push_back(Undef); - UndefElts.set(i); + UndefElts.setBit(i); } else { // Otherwise, defined. Elts.push_back(CV->getOperand(i)); } @@ -813,7 +852,7 @@ if (DemandedElts.isAllOnesValue()) return 0; - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Zero = Constant::getNullValue(EltTy); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; @@ -829,7 +868,7 @@ if (Depth == 10) return 0; - // If multiple users are using the root value, procede with + // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements // are needed. if (!V->hasOneUse()) { @@ -877,13 +916,13 @@ // Otherwise, the element inserted overwrites whatever was there, so the // input demanded set is simpler than the output set. APInt DemandedElts2 = DemandedElts; - DemandedElts2.clear(IdxNo); + DemandedElts2.clearBit(IdxNo); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, UndefElts, Depth+1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } // The inserted element is defined. - UndefElts.clear(IdxNo); + UndefElts.clearBit(IdxNo); break; } case Instruction::ShuffleVector: { @@ -898,9 +937,9 @@ assert(MaskVal < LHSVWidth * 2 && "shufflevector mask index out of range!"); if (MaskVal < LHSVWidth) - LeftDemanded.set(MaskVal); + LeftDemanded.setBit(MaskVal); else - RightDemanded.set(MaskVal - LHSVWidth); + RightDemanded.setBit(MaskVal - LHSVWidth); } } } @@ -919,16 +958,19 @@ for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { - UndefElts.set(i); + UndefElts.setBit(i); + } else if (!DemandedElts[i]) { + NewUndefElts = true; + UndefElts.setBit(i); } else if (MaskVal < LHSVWidth) { if (UndefElts4[MaskVal]) { NewUndefElts = true; - UndefElts.set(i); + UndefElts.setBit(i); } } else { if (UndefElts3[MaskVal - LHSVWidth]) { NewUndefElts = true; - UndefElts.set(i); + UndefElts.setBit(i); } } } @@ -950,7 +992,7 @@ } case Instruction::BitCast: { // Vector->vector casts only. - const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); + VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); if (!VTy) break; unsigned InVWidth = VTy->getNumElements(); APInt InputDemandedElts(InVWidth, 0); @@ -971,7 +1013,7 @@ Ratio = VWidth/InVWidth; for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { if (DemandedElts[OutIdx]) - InputDemandedElts.set(OutIdx/Ratio); + InputDemandedElts.setBit(OutIdx/Ratio); } } else { // Untested so far. @@ -983,7 +1025,7 @@ Ratio = InVWidth/VWidth; for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) if (DemandedElts[InIdx/Ratio]) - InputDemandedElts.set(InIdx); + InputDemandedElts.setBit(InIdx); } // div/rem demand all inputs, because they don't want divide by zero. @@ -1002,7 +1044,7 @@ // undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) if (UndefElts2[OutIdx/Ratio]) - UndefElts.set(OutIdx); + UndefElts.setBit(OutIdx); } else if (VWidth < InVWidth) { llvm_unreachable("Unimp"); // If there are more elements in the source than there are in the result, @@ -1011,7 +1053,7 @@ UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) if (!UndefElts2[InIdx]) // Not undef? - UndefElts.clear(InIdx/Ratio); // Clear undef bit. + UndefElts.clearBit(InIdx/Ratio); // Clear undef bit. } break; } @@ -1070,28 +1112,31 @@ Value *LHS = II->getArgOperand(0); Value *RHS = II->getArgOperand(1); // Extract the element as scalars. - LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, + LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, + RHS = InsertNewInstWith(ExtractElementInst::Create(RHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS), *II); + TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS, + II->getName()), *II); break; case Intrinsic::x86_sse_mul_ss: case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS), *II); + TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS, + II->getName()), *II); break; } Instruction *New = InsertElementInst::Create( UndefValue::get(II->getType()), TmpV, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false)); - InsertNewInstBefore(New, *II); + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), + II->getName()); + InsertNewInstWith(New, *II); return New; } }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 32ffa55..154267c 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -18,7 +18,7 @@ /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa<ConstantAggregateZero>(V)) + if (isa<ConstantAggregateZero>(V)) return true; if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { if (isConstant) return true; @@ -31,7 +31,7 @@ } Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // Insert element gets simplified to the inserted element or is deleted if // this is constant idx extract element and its a constant idx insertelt. if (I->getOpcode() == Instruction::InsertElement && isConstant && @@ -49,26 +49,24 @@ (CheapToScalarize(CI->getOperand(0), isConstant) || CheapToScalarize(CI->getOperand(1), isConstant))) return true; - + return false; } -/// Read and decode a shufflevector mask. -/// -/// It turns undef elements into values that are larger than the number of -/// elements in the input. -static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { +/// getShuffleMask - Read and decode a shufflevector mask. +/// Turn undef elements into negative values. +static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) { unsigned NElts = SVI->getType()->getNumElements(); if (isa<ConstantAggregateZero>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 0); + return std::vector<int>(NElts, 0); if (isa<UndefValue>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 2*NElts); - - std::vector<unsigned> Result; + return std::vector<int>(NElts, -1); + + std::vector<int> Result; const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) if (isa<UndefValue>(*i)) - Result.push_back(NElts*2); // undef -> 8 + Result.push_back(-1); // undef else Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); return Result; @@ -79,46 +77,45 @@ /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - const VectorType *PTy = cast<VectorType>(V->getType()); + VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(PTy->getElementType()); - + if (isa<UndefValue>(V)) return UndefValue::get(PTy->getElementType()); if (isa<ConstantAggregateZero>(V)) return Constant::getNullValue(PTy->getElementType()); if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) return CP->getOperand(EltNo); - + if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. - if (!isa<ConstantInt>(III->getOperand(2))) + if (!isa<ConstantInt>(III->getOperand(2))) return 0; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); - + // If this is an insert to the element we are looking for, return the // inserted value. - if (EltNo == IIElt) + if (EltNo == IIElt) return III->getOperand(1); - + // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return FindScalarElement(III->getOperand(0), EltNo); } - + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - unsigned InEl = getShuffleMask(SVI)[EltNo]; - if (InEl < LHSWidth) - return FindScalarElement(SVI->getOperand(0), InEl); - else if (InEl < LHSWidth*2) - return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); - else + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + int InEl = getShuffleMask(SVI)[EltNo]; + if (InEl < 0) return UndefValue::get(PTy->getElementType()); + if (InEl < (int)LHSWidth) + return FindScalarElement(SVI->getOperand(0), InEl); + return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } - + // Otherwise, we don't know. return 0; } @@ -127,11 +124,11 @@ // If vector val is undef, replace extract with scalar undef. if (isa<UndefValue>(EI.getOperand(0))) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - + // If vector val is constant 0, replace extract with scalar 0. if (isa<ConstantAggregateZero>(EI.getOperand(0))) return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - + if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { // If vector val is constant with all elements the same, replace EI with // that element. When the elements are not identical, we cannot replace yet @@ -139,53 +136,53 @@ Constant *op0 = C->getOperand(0); for (unsigned i = 1; i != C->getNumOperands(); ++i) if (C->getOperand(i) != op0) { - op0 = 0; + op0 = 0; break; } if (op0) return ReplaceInstUsesWith(EI, op0); } - + // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - + // If this is extracting an invalid index, turn this into undef, to avoid // crashing the code below. if (IndexVal >= VectorWidth) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - + // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use // property. if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { APInt UndefElts(VectorWidth, 0); APInt DemandedMask(VectorWidth, 0); - DemandedMask.set(IndexVal); + DemandedMask.setBit(IndexVal); if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, UndefElts)) { EI.setOperand(0, V); return &EI; } } - + if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) return ReplaceInstUsesWith(EI, Elt); - + // If the this extractelement is directly using a bitcast from a vector of // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (const VectorType *VT = + if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); } } - + if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { // Push extractelement into predecessor operation if legal and // profitable to do so @@ -193,9 +190,11 @@ if (I->hasOneUse() && CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1)); + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1)); + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); } } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { @@ -213,66 +212,75 @@ // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; Value *Src; unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - - if (SrcIdx < LHSWidth) + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + + if (SrcIdx < 0) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + if (SrcIdx < (int)LHSWidth) Src = SVI->getOperand(0); - else if (SrcIdx < LHSWidth*2) { + else { SrcIdx -= LHSWidth; Src = SVI->getOperand(1); - } else { - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); } + Type *Int32Ty = Type::getInt32Ty(EI.getContext()); return ExtractElementInst::Create(Src, - ConstantInt::get(Type::getInt32Ty(EI.getContext()), + ConstantInt::get(Int32Ty, SrcIdx, false)); } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + // Canonicalize extractelement(cast) -> cast(extractelement) + // bitcasts can change the number of vector elements and they cost nothing + if (CI->hasOneUse() && EI.hasOneUse() && + (CI->getOpcode() != Instruction::BitCast)) { + Value *EE = Builder->CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns -/// elements from either LHS or RHS, return the shuffle mask and true. +/// elements from either LHS or RHS, return the shuffle mask and true. /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, std::vector<Constant*> &Mask) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - + if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return true; } - + if (V == LHS) { for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); return true; } - + if (V == RHS) { for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i+NumElts)); return true; } - + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); Value *IdxOp = IEI->getOperand(2); - + if (!isa<ConstantInt>(IdxOp)) return false; unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. // Okay, we can handle this if the vector we are insertinting into is // transitively ok. @@ -280,13 +288,13 @@ // If so, update the mask to reflect the inserted undef. Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); return true; - } + } } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ if (isa<ConstantInt>(EI->getOperand(1)) && EI->getOperand(0)->getType() == V->getType()) { unsigned ExtractedIdx = cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); - + // This must be extracting from either LHS or RHS. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { // Okay, we can handle this if the vector we are insertinting into is @@ -294,15 +302,14 @@ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted value. if (EI->getOperand(0) == LHS) { - Mask[InsertedIdx % NumElts] = + Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), ExtractedIdx); } else { assert(EI->getOperand(0) == RHS); - Mask[InsertedIdx % NumElts] = + Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), ExtractedIdx+NumElts); - } return true; } @@ -311,7 +318,7 @@ } } // TODO: Handle shufflevector here! - + return false; } @@ -320,11 +327,11 @@ /// that computes V and the LHS value of the shuffle. static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, Value *&RHS) { - assert(V->getType()->isVectorTy() && + assert(V->getType()->isVectorTy() && (RHS == 0 || V->getType() == RHS->getType()) && "Invalid shuffle!"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - + if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; @@ -336,25 +343,25 @@ Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); Value *IdxOp = IEI->getOperand(2); - + if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && EI->getOperand(0)->getType() == V->getType()) { unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + // Either the extracted from or inserted into vector must be RHSVec, // otherwise we'd end up with a shuffle of three inputs. if (EI->getOperand(0) == RHS || RHS == 0) { RHS = EI->getOperand(0); Value *V = CollectShuffleElements(VecOp, Mask, RHS); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(V->getContext()), - NumElts+ExtractedIdx); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+ExtractedIdx); return V; } - + if (VecOp == RHS) { Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); // Everything but the extracted element is replaced with the RHS. @@ -365,7 +372,7 @@ } return V; } - + // If this insertelement is a chain that comes from exactly these two // vectors, return the vector and the effective shuffle. if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) @@ -374,7 +381,7 @@ } } // TODO: Handle shufflevector here! - + // Otherwise, can't do anything fancy. Return an identity vector. for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); @@ -385,32 +392,32 @@ Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); Value *IdxOp = IE.getOperand(2); - + // Inserting an undef or into an undefined place, remove this. if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) ReplaceInstUsesWith(IE, VecOp); - - // If the inserted element was extracted from some other vector, and if the + + // If the inserted element was extracted from some other vector, and if the // indexes are constant, try to turn this into a shufflevector operation. if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && EI->getOperand(0)->getType() == IE.getType()) { unsigned NumVectorElts = IE.getType()->getNumElements(); unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - + if (ExtractedIdx >= NumVectorElts) // Out of range extract. return ReplaceInstUsesWith(IE, VecOp); - + if (InsertedIdx >= NumVectorElts) // Out of range insert. return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); - + // If we are extracting a value from a vector, then inserting it right // back into the same place, just use the input vector. if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) - return ReplaceInstUsesWith(IE, VecOp); - + return ReplaceInstUsesWith(IE, VecOp); + // If this insertelement isn't used by some other insertelement, turn it // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { @@ -419,18 +426,20 @@ Value *LHS = CollectShuffleElements(&IE, Mask, RHS); if (RHS == 0) RHS = UndefValue::get(LHS->getType()); // We now have a shuffle of LHS, RHS, Mask. - return new ShuffleVectorInst(LHS, RHS, - ConstantVector::get(Mask)); + return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask)); } } } - + unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) + if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { + if (V != &IE) + return ReplaceInstUsesWith(IE, V); return &IE; - + } + return 0; } @@ -438,40 +447,46 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - std::vector<unsigned> Mask = getShuffleMask(&SVI); - + std::vector<int> Mask = getShuffleMask(&SVI); + bool MadeChange = false; - + // Undefined shuffle mask -> undefined value. if (isa<UndefValue>(SVI.getOperand(2))) return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - - unsigned VWidth = Mask.size(); - unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); - + + unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); + + if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) + return 0; + APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + if (V != &SVI) + return ReplaceInstUsesWith(SVI, V); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; } - + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). if (LHS == RHS || isa<UndefValue>(LHS)) { - if (isa<UndefValue>(LHS) && LHS == RHS) - return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - + if (isa<UndefValue>(LHS) && LHS == RHS) { + // shuffle(undef,undef,mask) -> undef. + return ReplaceInstUsesWith(SVI, LHS); + } + // Remap any references to RHS to use LHS. std::vector<Constant*> Elts; - for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { - if (Mask[i] >= 2*e) + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); else { - if ((Mask[i] >= e && isa<UndefValue>(RHS)) || - (Mask[i] < e && isa<UndefValue>(LHS))) { - Mask[i] = 2*e; // Turn into undef. + if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || + (Mask[i] < (int)e && isa<UndefValue>(LHS))) { + Mask[i] = -1; // Turn into undef. Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); } else { Mask[i] = Mask[i] % e; // Force to LHS. @@ -487,150 +502,74 @@ RHS = SVI.getOperand(1); MadeChange = true; } - + // Analyze the shuffle, are the LHS or RHS and identity shuffles? - if (VWidth == LHSWidth) { - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= e*2) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } - - // Eliminate identity shuffles. - if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); - if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - } - - // Check for a handful of important shuffle(shuffle()) combinations. - ShuffleVectorInst *LSVI = dyn_cast<ShuffleVectorInst>(LHS); - if (!LSVI) - return MadeChange ? &SVI : 0; + bool isLHSID = true, isRHSID = true; - LHS = LSVI->getOperand(0); - std::vector<unsigned> LHSMask = getShuffleMask(LSVI); - unsigned LHSInNElts = cast<VectorType>(LHS->getType())->getNumElements(); - - // If lhs is identity, propagate - bool isLHSLoExtract = true, isLHSHiExtract = true; - for (unsigned i = 0, e = LHSMask.size(); i != e; ++i) { - if (LHSMask[i] >= LHSInNElts*2) continue; // Ignore undef values; - isLHSLoExtract &= (LHSMask[i] == i); - isLHSHiExtract &= (LHSMask[i] == i+(LHSInNElts/2)); - } - if ((isLHSLoExtract || isLHSHiExtract) && - (isa<UndefValue>(RHS) || (LHSWidth == LHSInNElts))) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = VWidth; i != e; ++i) { - if (Mask[i] >= 2*LHSWidth) - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - LHSMask[Mask[i]])); - } - if (isa<UndefValue>(RHS)) - RHS = UndefValue::get(LHS->getType()); - return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Elts)); - } - - // If svi + lhs forms a full unpack, merge it. This allows llvm to emit - // efficient code for matrix transposes written with generic vector ops. - if ((LHSMask.size() == Mask.size()) && isPowerOf2_32(Mask.size()) && - (Mask.size() > 1)) { - bool isUnpackLo = true, isUnpackHi = true; - // check lhs mask for <0, u, 1, u .. >; - for (unsigned i = 0, e = LHSMask.size(); i != e; ++i) { - if (LHSMask[i] >= 2*e) continue; - isUnpackLo &= (LHSMask[i] == (i/2)); - isUnpackHi &= (LHSMask[i] == (i/2) + (e/2)); - } - for (unsigned i = 0, e = Mask.size(); i != e && (isUnpackLo || isUnpackHi); - i += 2) { - isUnpackLo &= (Mask[i] == i) && (Mask[i+1] == (i/2)+e); - isUnpackHi &= (Mask[i] == i) && (Mask[i+1] == (i/2)+e+(e/2)); - } - if (isUnpackLo || isUnpackHi) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= 2*e) - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else if (Mask[i] >= e) - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - Mask[i])); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - LHSMask[Mask[i]])); - } - return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Elts)); - } + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); } - // If rhs is shuffle + identity, propagate. - if (ShuffleVectorInst *RSVI = dyn_cast<ShuffleVectorInst>(RHS)) { - std::vector<unsigned> RHSMask = getShuffleMask(RSVI); - unsigned RHSInNElts = - cast<VectorType>(RSVI->getOperand(0)->getType())->getNumElements(); + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - // If rhs is identity, propagate - bool isRHSLoExtract = true, isRHSHiExtract = true; - for (unsigned i = 0, e = RHSMask.size(); i != e; ++i) { - if (RHSMask[i] >= RHSInNElts*2) continue; // Ignore undef values; - isRHSLoExtract &= (RHSMask[i] == i); - isRHSHiExtract &= (RHSMask[i] == i+(RHSInNElts/2)); - } - if ((isRHSLoExtract || isRHSHiExtract) && (LHSWidth == RHSInNElts)) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = VWidth; i != e; ++i) { - if (Mask[i] >= 2*LHSWidth) - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else if (Mask[i] < LHSWidth) - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - Mask[i])); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - RHSMask[Mask[i]-LHSWidth]+LHSWidth)); - } - SVI.setOperand(1, RSVI->getOperand(0)); - SVI.setOperand(2, ConstantVector::get(Elts)); - return &SVI; - } - } - - // Be extremely conservative when merging shufflevector instructions. It is - // difficult for the code generator to recognize a merged shuffle, which - // usually leads to worse code from merging a shuffle. - if (!isa<UndefValue>(RHS)) - return MadeChange ? &SVI : 0; - - // If the merged shuffle mask is one of the two input shuffle masks, which - // just removes one instruction. This should handle splat(splat) -> splat. - if (LHSMask.size() == Mask.size()) { - std::vector<unsigned> NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); - - // If the result mask is equal to the src shuffle or this shuffle mask, - // do the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - std::vector<Constant*> Elts; - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - } else { - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - NewMask[i])); + // If the LHS is a shufflevector itself, see if we can combine it with this + // one without producing an unusual shuffle. Here we are really conservative: + // we are absolutely afraid of producing a shuffle mask not in the input + // program, because the code gen may not be smart enough to turn a merged + // shuffle into two specific shuffles: it may produce worse code. As such, + // we only merge two shuffles if the result is either a splat or one of the + // two input shuffle masks. In this case, merging the shuffles just removes + // one instruction, which we know is safe. This is good for things like + // turning: (splat(splat)) -> splat. + if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { + if (isa<UndefValue>(RHS)) { + std::vector<int> LHSMask = getShuffleMask(LHSSVI); + + if (LHSMask.size() == Mask.size()) { + std::vector<int> NewMask; + bool isSplat = true; + int SplatElt = -1; // undef + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + int MaskElt; + if (Mask[i] < 0 || Mask[i] >= (int)e) + MaskElt = -1; // undef + else + MaskElt = LHSMask[Mask[i]]; + // Check if this could still be a splat. + if (MaskElt >= 0) { + if (SplatElt >=0 && SplatElt != MaskElt) + isSplat = false; + SplatElt = MaskElt; + } + NewMask.push_back(MaskElt); + } + + // If the result mask is equal to the src shuffle or this + // shuffle mask, do the replacement. + if (isSplat || NewMask == LHSMask || NewMask == Mask) { + std::vector<Constant*> Elts; + Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { + if (NewMask[i] < 0) { + Elts.push_back(UndefValue::get(Int32Ty)); + } else { + Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i])); + } + } + return new ShuffleVectorInst(LHSSVI->getOperand(0), + LHSSVI->getOperand(1), + ConstantVector::get(Elts)); } } - return new ShuffleVectorInst(LHS, LSVI->getOperand(1), - ConstantVector::get(Elts)); } } + return MadeChange ? &SVI : 0; }
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h b/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h index 9100a85..32009c3 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/src/LLVM/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -53,6 +53,7 @@ void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { assert(Worklist.empty() && "Worklist must be empty to add initial group"); Worklist.reserve(NumEntries+16); + WorklistMap.resize(NumEntries); DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); for (; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1];
diff --git a/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp b/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp index 72f6b5c..c15b805 100644 --- a/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/src/LLVM/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -39,14 +39,18 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm-c/Initialization.h" #include <algorithm> #include <climits> using namespace llvm; @@ -56,14 +60,24 @@ STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor , "Number of factorizations"); +STATISTIC(NumReassoc , "Number of reassociations"); +// Initialization Routines +void llvm::initializeInstCombine(PassRegistry &Registry) { + initializeInstCombinerPass(Registry); +} + +void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { + initializeInstCombine(*unwrap(R)); +} char InstCombiner::ID = 0; INITIALIZE_PASS(InstCombiner, "instcombine", - "Combine redundant instructions", false, false); + "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } @@ -71,7 +85,7 @@ /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. -bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { +bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); // If we don't have TD, we don't know if the source/dest are legal. @@ -95,54 +109,374 @@ return true; } +// Return true, if No Signed Wrap should be maintained for I. +// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", +// where both B and C should be ConstantInts, results in a constant that does +// not overflow. This function only handles the Add and Sub opcodes. For +// all other opcodes, the function conservatively returns false. +static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { + OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I); + if (!OBO || !OBO->hasNoSignedWrap()) { + return false; + } -// SimplifyCommutative - This performs a few simplifications for commutative -// operators: + // We reason about Add and Sub Only. + Instruction::BinaryOps Opcode = I.getOpcode(); + if (Opcode != Instruction::Add && + Opcode != Instruction::Sub) { + return false; + } + + ConstantInt *CB = dyn_cast<ConstantInt>(B); + ConstantInt *CC = dyn_cast<ConstantInt>(C); + + if (!CB || !CC) { + return false; + } + + const APInt &BVal = CB->getValue(); + const APInt &CVal = CC->getValue(); + bool Overflow = false; + + if (Opcode == Instruction::Add) { + BVal.sadd_ov(CVal, Overflow); + } else { + BVal.ssub_ov(CVal, Overflow); + } + + return !Overflow; +} + +/// SimplifyAssociativeOrCommutative - This performs a few simplifications for +/// operators which are associative or commutative: +// +// Commutative operators: // // 1. Order operands such that they are listed from right (least complex) to // left (most complex). This puts constants before unary operators before // binary operators. // -// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) -// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) +// Associative operators: // -bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { - bool Changed = false; - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) - Changed = !I.swapOperands(); - - if (!I.isAssociative()) return Changed; - +// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. +// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. +// +// Associative and commutative operators: +// +// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. +// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. +// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" +// if C1 and C2 are constants. +// +bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Instruction::BinaryOps Opcode = I.getOpcode(); - if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) - if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { - if (isa<Constant>(I.getOperand(1))) { - Constant *Folded = ConstantExpr::get(I.getOpcode(), - cast<Constant>(I.getOperand(1)), - cast<Constant>(Op->getOperand(1))); - I.setOperand(0, Op->getOperand(0)); - I.setOperand(1, Folded); - return true; - } - - if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1))) - if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && - Op->hasOneUse() && Op1->hasOneUse()) { - Constant *C1 = cast<Constant>(Op->getOperand(1)); - Constant *C2 = cast<Constant>(Op1->getOperand(1)); + bool Changed = false; - // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), - Op1->getOperand(0), - &I); - Worklist.Add(New); - I.setOperand(0, New); - I.setOperand(1, Folded); - return true; + do { + // Order operands such that they are listed from right (least complex) to + // left (most complex). This puts constants before unary operators before + // binary operators. + if (I.isCommutative() && getComplexity(I.getOperand(0)) < + getComplexity(I.getOperand(1))) + Changed = !I.swapOperands(); + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0)); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)); + + if (I.isAssociative()) { + // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "B op C" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) { + // It simplifies to V. Form "A op V". + I.setOperand(0, A); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + if (MaintainNoSignedWrap(I, B, C) && + (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { + // Note: this is only valid because SimplifyBinOp doesn't look at + // the operands to Op0. + I.clearSubclassOptionalData(); + I.setHasNoSignedWrap(true); + } else { + I.clearSubclassOptionalData(); + } + + Changed = true; + ++NumReassoc; + continue; } + } + + // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "A op B" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) { + // It simplifies to V. Form "V op C". + I.setOperand(0, V); + I.setOperand(1, C); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } } - return Changed; + + if (I.isAssociative() && I.isCommutative()) { + // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = I.getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "V op B". + I.setOperand(0, V); + I.setOperand(1, B); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = I.getOperand(0); + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) { + // It simplifies to V. Form "B op V". + I.setOperand(0, B); + I.setOperand(1, V); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + Changed = true; + ++NumReassoc; + continue; + } + } + + // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" + // if C1 and C2 are constants. + if (Op0 && Op1 && + Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && + isa<Constant>(Op0->getOperand(1)) && + isa<Constant>(Op1->getOperand(1)) && + Op0->hasOneUse() && Op1->hasOneUse()) { + Value *A = Op0->getOperand(0); + Constant *C1 = cast<Constant>(Op0->getOperand(1)); + Value *B = Op1->getOperand(0); + Constant *C2 = cast<Constant>(Op1->getOperand(1)); + + Constant *Folded = ConstantExpr::get(Opcode, C1, C2); + BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); + InsertNewInstWith(New, I); + New->takeName(Op1); + I.setOperand(0, New); + I.setOperand(1, Folded); + // Conservatively clear the optional flags, since they may not be + // preserved by the reassociation. + I.clearSubclassOptionalData(); + + Changed = true; + continue; + } + } + + // No further simplifications. + return Changed; + } while (1); +} + +/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to +/// "(X LOp Y) ROp (X LOp Z)". +static bool LeftDistributesOverRight(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + switch (LOp) { + default: + return false; + + case Instruction::And: + // And distributes over Or and Xor. + switch (ROp) { + default: + return false; + case Instruction::Or: + case Instruction::Xor: + return true; + } + + case Instruction::Mul: + // Multiplication distributes over addition and subtraction. + switch (ROp) { + default: + return false; + case Instruction::Add: + case Instruction::Sub: + return true; + } + + case Instruction::Or: + // Or distributes over And. + switch (ROp) { + default: + return false; + case Instruction::And: + return true; + } + } +} + +/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to +/// "(X ROp Z) LOp (Y ROp Z)". +static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, + Instruction::BinaryOps ROp) { + if (Instruction::isCommutative(ROp)) + return LeftDistributesOverRight(ROp, LOp); + // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", + // but this requires knowing that the addition does not overflow and other + // such subtleties. + return false; +} + +/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations +/// which some other binary operation distributes over either by factorizing +/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this +/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is +/// a win). Returns the simplified value, or null if it didn't simplify. +Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op + + // Factorization. + if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) { + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Does "X op' Y" always equal "Y op' X"? + bool InnerCommutative = Instruction::isCommutative(InnerOpcode); + + // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? + if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (InnerCommutative && A == D)) { + if (A != C) + std::swap(C, D); + // Consider forming "A op' (B op D)". + // If "B op D" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD); + // If "B op D" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, A, V); + V->takeName(&I); + return V; + } + } + + // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? + if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (InnerCommutative && B == C)) { + if (B != D) + std::swap(C, D); + // Consider forming "(A op C) op' B". + // If "A op C" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD); + // If "A op C" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && Op0->hasOneUse() && Op1->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName()); + if (V) { + ++NumFactor; + V = Builder->CreateBinOp(InnerOpcode, V, B); + V->takeName(&I); + return V; + } + } + } + + // Expansion. + if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { + // The instruction has the form "(A op' B) op C". See if expanding it out + // to "(A op C) op' (B op C)" results in simplifications. + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; + Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + + // Do "A op C" and "B op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. + if ((L == A && R == B) || + (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) + return Op0; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + C = Builder->CreateBinOp(InnerOpcode, L, R); + C->takeName(&I); + return C; + } + } + + if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { + // The instruction has the form "A op (B op' C)". See if expanding it out + // to "(A op B) op' (A op C)" results in simplifications. + Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); + Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' + + // Do "A op B" and "A op C" both simplify? + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) { + // They do! Return "L op' R". + ++NumExpand; + // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. + if ((L == B && R == C) || + (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) + return Op1; + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD)) + return V; + // Otherwise, create a new instruction. + A = Builder->CreateBinOp(InnerOpcode, L, R); + A->takeName(&I); + return A; + } + } + + return 0; } // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction @@ -184,8 +518,9 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner *IC) { - if (CastInst *CI = dyn_cast<CastInst>(&I)) + if (CastInst *CI = dyn_cast<CastInst>(&I)) { return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); + } // Figure out if the constant is the left or the right argument. bool ConstIsRHS = isa<Constant>(I.getOperand(1)); @@ -202,11 +537,14 @@ std::swap(Op0, Op1); if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1); + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1); + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1); + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); llvm_unreachable("Unknown binary instruction type!"); } @@ -224,11 +562,24 @@ // Bool selects with constant operands can be folded to logical ops. if (SI->getType()->isIntegerTy(1)) return 0; + // If it's a bitcast involving vectors, make sure it has the same number of + // elements on both sides. + if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) { + VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); + VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); + + // Verify that either both or neither are vectors. + if ((SrcTy == NULL) != (DestTy == NULL)) return 0; + // If vectors, verify that they have the same number of elements. + if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) + return 0; + } + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); - return SelectInst::Create(SI->getCondition(), SelectTrueVal, - SelectFalseVal); + return SelectInst::Create(SI->getCondition(), + SelectTrueVal, SelectFalseVal); } return 0; } @@ -238,20 +589,25 @@ /// has a PHI node as operand #0, see if we can fold the instruction into the /// PHI (which is only possible if all operands to the PHI are constants). /// -/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms -/// that would normally be unprofitable because they strongly encourage jump -/// threading. -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, - bool AllowAggressive) { - AllowAggressive = false; +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast<PHINode>(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || - // We normally only transform phis with a single use, unless we're trying - // hard to make jump threading happen. - (!PN->hasOneUse() && !AllowAggressive)) + if (NumPHIValues == 0) return 0; + // We normally only transform phis with a single use. However, if a PHI has + // multiple uses and they are all the same operation, we can fold *all* of the + // uses into the PHI. + if (!PN->hasOneUse()) { + // Walk the use list for the instruction, comparing them to I. + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User != &I && !I.isIdenticalTo(User)) + return 0; + } + // Otherwise, we can replace *all* users with the new PHI we form. + } // Check to see if all of the operands of the PHI are simple constants // (constantint/constantfp/undef). If there is one non-constant value, @@ -259,34 +615,48 @@ // bail out. We don't do arbitrary constant expressions here because moving // their computation can be expensive without a cost model. BasicBlock *NonConstBB = 0; - for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa<Constant>(PN->getIncomingValue(i)) || - isa<ConstantExpr>(PN->getIncomingValue(i))) { - if (NonConstBB) return 0; // More than one non-const value. - if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. - NonConstBB = PN->getIncomingBlock(i); - - // If the incoming non-constant value is in I's block, we have an infinite - // loop. - if (NonConstBB == I.getParent()) + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InVal = PN->getIncomingValue(i); + if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal)) + continue; + + if (isa<PHINode>(InVal)) return 0; // Itself a phi. + if (NonConstBB) return 0; // More than one non-const value. + + NonConstBB = PN->getIncomingBlock(i); + + // If the InVal is an invoke at the end of the pred block, then we can't + // insert a computation after it without breaking the edge. + if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) + if (II->getParent() == NonConstBB) return 0; - } + + // If the incoming non-constant value is in I's block, we will remove one + // instruction, but insert another equivalent one, leading to infinite + // instcombine. + if (NonConstBB == I.getParent()) + return 0; + } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0 && !AllowAggressive) { + if (NonConstBB != 0) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); if (!BI || !BI->isUnconditional()) return 0; } // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType()); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); + PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); - + + // If we are going to have to insert a new computation, do so right before the + // predecessors terminator. + if (NonConstBB) + Builder->SetInsertPoint(NonConstBB->getTerminator()); + // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { // We only currently try to fold the condition of a select when it is a phi, @@ -299,61 +669,59 @@ Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, - FalseVInPred, - NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } + else + InV = Builder->CreateSelect(PN->getIncomingValue(i), + TrueVInPred, FalseVInPred, "phitmp"); NewPN->addIncoming(InV, ThisBB); } + } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { + Constant *C = cast<Constant>(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else if (isa<ICmpInst>(CI)) + InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + else + InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } } else if (I.getNumOperands() == 2) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { - if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); - else - InV = ConstantExpr::get(I.getOpcode(), InC, C); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - InV = BinaryOperator::Create(BO->getOpcode(), - PN->getIncomingValue(i), C, - NonConstBB->getTerminator()); - else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), - PN->getIncomingValue(i), C, - NonConstBB->getTerminator()); - else - llvm_unreachable("Unknown binop!"); - - Worklist.Add(cast<Instruction>(InV)); - } + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + InV = ConstantExpr::get(I.getOpcode(), InC, C); + else + InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(), + PN->getIncomingValue(i), C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { CastInst *CI = cast<CastInst>(&I); - const Type *RetTy = CI->getType(); + Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), - I.getType(), - NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } + else + InV = Builder->CreateCast(CI->getOpcode(), + PN->getIncomingValue(i), I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ) { + Instruction *User = cast<Instruction>(*UI++); + if (User == &I) continue; + ReplaceInstUsesWith(*User, NewPN); + EraseInstFromFunction(*User); + } return ReplaceInstUsesWith(I, NewPN); } @@ -361,7 +729,7 @@ /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; @@ -369,7 +737,7 @@ // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -392,7 +760,7 @@ if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) return 0; - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD->getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -403,7 +771,7 @@ Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); @@ -418,38 +786,53 @@ return Ty; } - +static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && + !Src.hasOneUse()) + return false; + return true; +} Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) + if (Value *V = SimplifyGEPInst(Ops, TD)) return ReplaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); - if (isa<UndefValue>(GEP.getOperand(0))) - return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); - - // Eliminate unneeded casts for indices. + // Eliminate unneeded casts for indices, and replace indices which displace + // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - unsigned PtrSize = TD->getPointerSizeInBits(); - + Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; ++I, ++GTI) { - if (!isa<SequentialType>(*GTI)) continue; - - // If we are using a wider index than needed for this platform, shrink it - // to what we need. If narrower, sign-extend it to what we need. This - // explicit cast can make subsequent optimizations more obvious. - unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); - if (OpBits == PtrSize) - continue; - - *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); - MadeChange = true; + // Skip indices into struct types. + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + if (!SeqTy) continue; + + // If the element type has zero size then any index over it is equivalent + // to an index of zero, so replace it with zero if it is not zero already. + if (SeqTy->getElementType()->isSized() && + TD->getTypeAllocSize(SeqTy->getElementType()) == 0) + if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { + *I = Constant::getNullValue(IntPtrTy); + MadeChange = true; + } + + if ((*I)->getType() != IntPtrTy) { + // If we are using a wider index than needed for this platform, shrink + // it to what we need. If narrower, sign-extend it to what we need. + // This explicit cast can make subsequent optimizations more obvious. + *I = Builder->CreateIntCast(*I, IntPtrTy, true); + MadeChange = true; + } } if (MadeChange) return &GEP; } @@ -459,13 +842,15 @@ // getelementptr instructions into a single instruction. // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) + return 0; + // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. - // - if (GetElementPtrInst *SrcGEP = - dyn_cast<GetElementPtrInst>(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2) + if (GEPOperator *SrcGEP = + dyn_cast<GEPOperator>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return 0; // Wait until our source is folded to completion. SmallVector<Value*, 8> Indices; @@ -495,7 +880,7 @@ // normalized. if (SO1->getType() != GO1->getType()) return 0; - Sum = Builder->CreateAdd(SO1, GO1); + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } // Update the GEP in place if possible. @@ -517,44 +902,43 @@ if (!Indices.empty()) return (GEP.isInBounds() && Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), - Indices.end()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), - Indices.end()); + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, + GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); } - + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - if (StrippedPtr != PtrOp) { - const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + if (StrippedPtr != PtrOp && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); - + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... - // + // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { - const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); - if (const ArrayType *CATy = + PointerType *CPTy = cast<PointerType>(PtrOp->getType()); + if (ArrayType *CATy = dyn_cast<ArrayType>(CPTy->getElementType())) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); GetElementPtrInst *Res = - GetElementPtrInst::Create(StrippedPtr, Idx.begin(), - Idx.end()); + GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); return Res; } - if (const ArrayType *XATy = + if (ArrayType *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { @@ -572,8 +956,8 @@ // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - const Type *SrcElTy = StrippedPtrTy->getElementType(); - const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); + Type *SrcElTy = StrippedPtrTy->getElementType(); + Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); if (TD && SrcElTy->isArrayTy() && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { @@ -581,8 +965,8 @@ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = GEP.getOperand(1); Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2) : - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -632,7 +1016,7 @@ if (Scale->getZExtValue() != 1) { Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), false /*ZExt*/); - NewIdx = Builder->CreateMul(NewIdx, C); + NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); } // Insert the new GEP instruction. @@ -640,15 +1024,15 @@ Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = NewIdx; Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2): - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()): + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); } } } } - + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> @@ -656,18 +1040,21 @@ /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { if (TD && - !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); int64_t Offset = OffsetV->getSExtValue(); - + // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (Offset == 0) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa<AllocaInst>(BCI->getOperand(0))) { + if (isa<AllocaInst>(BCI->getOperand(0)) || + isMalloc(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -685,14 +1072,12 @@ // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector<Value*, 8> NewIndices; - const Type *InTy = + Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); if (FindElementAtOffset(InTy, Offset, NewIndices)) { Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -707,18 +1092,95 @@ -static bool IsOnlyNullComparedAndFreed(const Value &V) { - for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end(); +static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users, + int Depth = 0) { + if (Depth == 8) + return false; + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { - const User *U = *UI; - if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U)) - if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) + User *U = *UI; + if (isFreeCall(U)) { + Users.push_back(U); + continue; + } + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) { + Users.push_back(ICI); continue; + } + } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { + Users.push_back(BCI); + continue; + } + } + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { + Users.push_back(GEPI); + continue; + } + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + Users.push_back(II); + continue; + } + } return false; } return true; } +Instruction *InstCombiner::visitMalloc(Instruction &MI) { + // If we have a malloc call which is only used in any amount of comparisons + // to null and free calls, delete the calls and replace the comparisons with + // true or false as appropriate. + SmallVector<WeakVH, 64> Users; + if (IsOnlyNullComparedAndFreed(&MI, Users)) { + for (unsigned i = 0, e = Users.size(); i != e; ++i) { + Instruction *I = cast_or_null<Instruction>(&*Users[i]); + if (!I) continue; + + if (ICmpInst *C = dyn_cast<ICmpInst>(I)) { + ReplaceInstUsesWith(*C, + ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); + } + EraseInstFromFunction(*I); + } + return EraseInstFromFunction(MI); + } + return 0; +} + + + +Instruction *InstCombiner::visitFree(CallInst &FI) { + Value *Op = FI.getArgOperand(0); + + // free undef -> unreachable. + if (isa<UndefValue>(Op)) { + // Insert a new store to null because we cannot modify the CFG here. + Builder->CreateStore(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); + return EraseInstFromFunction(FI); + } + + // If we have 'free null' delete the instruction. This can happen in stl code + // when lots of inlining happens. + if (isa<ConstantPointerNull>(Op)) + return EraseInstFromFunction(FI); + + return 0; +} + + + Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True Value *X = 0; @@ -728,8 +1190,7 @@ !isa<Constant>(X)) { // Swap Destinations and condition... BI.setCondition(X); - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); return &BI; } @@ -744,8 +1205,7 @@ Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -761,8 +1221,7 @@ ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -776,11 +1235,17 @@ if (I->getOpcode() == Instruction::Add) if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) - SI.setOperand(i, - ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), - AddRHS)); - SI.setOperand(0, I->getOperand(0)); + unsigned NumCases = SI.getNumCases(); + // Skip the first item since that's the default case. + for (unsigned i = 1; i < NumCases; ++i) { + ConstantInt* CaseVal = SI.getCaseValue(i); + Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), + AddRHS); + assert(isa<ConstantInt>(NewCaseVal) && + "Result of expression should be constant"); + SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal)); + } + SI.setCondition(I->getOperand(0)); Worklist.Add(I); return &SI; } @@ -807,7 +1272,7 @@ if (EV.getNumIndices() > 1) // Extract the remaining indices out of the constant indexed by the // first index - return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + return ExtractValueInst::Create(V, EV.getIndices().slice(1)); else return ReplaceInstUsesWith(EV, V); } @@ -830,7 +1295,7 @@ // with // %E = extractvalue { i32, { i32 } } %A, 0 return ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); } if (exti == exte && insi == inse) // Both iterators are at the end: Index lists are identical. Replace @@ -848,9 +1313,9 @@ // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - insi, inse); + makeArrayRef(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -862,7 +1327,7 @@ // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - exti, exte); + makeArrayRef(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which @@ -877,16 +1342,24 @@ case Intrinsic::sadd_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); } + + // If the normal result of the add is dead, and the RHS is a constant, + // we can transform this into a range comparison. + // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 + if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow) + if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1))) + return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0), + ConstantExpr::getNot(CI)); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateSub(LHS, RHS); } @@ -895,7 +1368,7 @@ case Intrinsic::smul_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - II->replaceAllUsesWith(UndefValue::get(II->getType())); + ReplaceInstUsesWith(*II, UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateMul(LHS, RHS); } @@ -905,10 +1378,372 @@ } } } - // Can't simplify extracts from other values. Note that nested extracts are - // already simplified implicitely by the above (extract ( extract (insert) ) + if (LoadInst *L = dyn_cast<LoadInst>(Agg)) + // If the (non-volatile) load only has one use, we can rewrite this to a + // load from a GEP. This reduces the size of the load. + // FIXME: If a load is used only by extractvalue instructions then this + // could be done regardless of having multiple uses. + if (L->isSimple() && L->hasOneUse()) { + // extractvalue has integer indices, getelementptr has Value*s. Convert. + SmallVector<Value*, 4> Indices; + // Prefix an i32 0 since we need the first element. + Indices.push_back(Builder->getInt32(0)); + for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); + I != E; ++I) + Indices.push_back(Builder->getInt32(*I)); + + // We need to insert these at the location of the old load, not at that of + // the extractvalue. + Builder->SetInsertPoint(L->getParent(), L); + Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); + // Returning the load directly will cause the main loop to insert it in + // the wrong spot, so use ReplaceInstUsesWith(). + return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); + } + // We could simplify extracts from other values. Note that nested extracts may + // already be simplified implicitly by the above: extract (extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just - // the value inserted, if appropriate). + // the value inserted, if appropriate. Similarly for extracts from single-use + // loads: extract (extract (load)) will be translated to extract (load (gep)) + // and if again single-use then via load (gep (gep)) to load (gep). + // However, double extracts from e.g. function arguments or return values + // aren't handled yet. + return 0; +} + +enum Personality_Type { + Unknown_Personality, + GNU_Ada_Personality, + GNU_CXX_Personality +}; + +/// RecognizePersonality - See if the given exception handling personality +/// function is one that we understand. If so, return a description of it; +/// otherwise return Unknown_Personality. +static Personality_Type RecognizePersonality(Value *Pers) { + Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); + if (!F) + return Unknown_Personality; + return StringSwitch<Personality_Type>(F->getName()) + .Case("__gnat_eh_personality", GNU_Ada_Personality) + .Case("__gxx_personality_v0", GNU_CXX_Personality) + .Default(Unknown_Personality); +} + +/// isCatchAll - Return 'true' if the given typeinfo will match anything. +static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) { + switch (Personality) { + case Unknown_Personality: + return false; + case GNU_Ada_Personality: + // While __gnat_all_others_value will match any Ada exception, it doesn't + // match foreign exceptions (or didn't, before gcc-4.7). + return false; + case GNU_CXX_Personality: + return TypeInfo->isNullValue(); + } + llvm_unreachable("Unknown personality!"); +} + +static bool shorter_filter(const Value *LHS, const Value *RHS) { + return + cast<ArrayType>(LHS->getType())->getNumElements() + < + cast<ArrayType>(RHS->getType())->getNumElements(); +} + +Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { + // The logic here should be correct for any real-world personality function. + // However if that turns out not to be true, the offending logic can always + // be conditioned on the personality function, like the catch-all logic is. + Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn()); + + // Simplify the list of clauses, eg by removing repeated catch clauses + // (these are often created by inlining). + bool MakeNewInstruction = false; // If true, recreate using the following: + SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction; + bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. + + SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. + for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { + bool isLastClause = i + 1 == e; + if (LI.isCatch(i)) { + // A catch clause. + Value *CatchClause = LI.getClause(i); + Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts()); + + // If we already saw this clause, there is no point in having a second + // copy of it. + if (AlreadyCaught.insert(TypeInfo)) { + // This catch clause was not already seen. + NewClauses.push_back(CatchClause); + } else { + // Repeated catch clause - drop the redundant copy. + MakeNewInstruction = true; + } + + // If this is a catch-all then there is no point in keeping any following + // clauses or marking the landingpad as having a cleanup. + if (isCatchAll(Personality, TypeInfo)) { + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + } else { + // A filter clause. If any of the filter elements were already caught + // then they can be dropped from the filter. It is tempting to try to + // exploit the filter further by saying that any typeinfo that does not + // occur in the filter can't be caught later (and thus can be dropped). + // However this would be wrong, since typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some + // class derived from it). + assert(LI.isFilter(i) && "Unsupported landingpad clause!"); + Value *FilterClause = LI.getClause(i); + ArrayType *FilterType = cast<ArrayType>(FilterClause->getType()); + unsigned NumTypeInfos = FilterType->getNumElements(); + + // An empty filter catches everything, so there is no point in keeping any + // following clauses or marking the landingpad as having a cleanup. By + // dealing with this case here the following code is made a bit simpler. + if (!NumTypeInfos) { + NewClauses.push_back(FilterClause); + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + + bool MakeNewFilter = false; // If true, make a new filter. + SmallVector<Constant *, 16> NewFilterElts; // New elements. + if (isa<ConstantAggregateZero>(FilterClause)) { + // Not an empty filter - it contains at least one null typeinfo. + assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); + Constant *TypeInfo = + Constant::getNullValue(FilterType->getElementType()); + // If this typeinfo is a catch-all then the filter can never match. + if (isCatchAll(Personality, TypeInfo)) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // There is no point in having multiple copies of this typeinfo, so + // discard all but the first copy if there is more than one. + NewFilterElts.push_back(TypeInfo); + if (NumTypeInfos > 1) + MakeNewFilter = true; + } else { + ConstantArray *Filter = cast<ConstantArray>(FilterClause); + SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. + NewFilterElts.reserve(NumTypeInfos); + + // Remove any filter elements that were already caught or that already + // occurred in the filter. While there, see if any of the elements are + // catch-alls. If so, the filter can be discarded. + bool SawCatchAll = false; + for (unsigned j = 0; j != NumTypeInfos; ++j) { + Value *Elt = Filter->getOperand(j); + Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts()); + if (isCatchAll(Personality, TypeInfo)) { + // This element is a catch-all. Bail out, noting this fact. + SawCatchAll = true; + break; + } + if (AlreadyCaught.count(TypeInfo)) + // Already caught by an earlier clause, so having it in the filter + // is pointless. + continue; + // There is no point in having multiple copies of the same typeinfo in + // a filter, so only add it if we didn't already. + if (SeenInFilter.insert(TypeInfo)) + NewFilterElts.push_back(cast<Constant>(Elt)); + } + // A filter containing a catch-all cannot match anything by definition. + if (SawCatchAll) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // If we dropped something from the filter, make a new one. + if (NewFilterElts.size() < NumTypeInfos) + MakeNewFilter = true; + } + if (MakeNewFilter) { + FilterType = ArrayType::get(FilterType->getElementType(), + NewFilterElts.size()); + FilterClause = ConstantArray::get(FilterType, NewFilterElts); + MakeNewInstruction = true; + } + + NewClauses.push_back(FilterClause); + + // If the new filter is empty then it will catch everything so there is + // no point in keeping any following clauses or marking the landingpad + // as having a cleanup. The case of the original filter being empty was + // already handled above. + if (MakeNewFilter && !NewFilterElts.size()) { + assert(MakeNewInstruction && "New filter but not a new instruction!"); + CleanupFlag = false; + break; + } + } + } + + // If several filters occur in a row then reorder them so that the shortest + // filters come first (those with the smallest number of elements). This is + // advantageous because shorter filters are more likely to match, speeding up + // unwinding, but mostly because it increases the effectiveness of the other + // filter optimizations below. + for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { + unsigned j; + // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. + for (j = i; j != e; ++j) + if (!isa<ArrayType>(NewClauses[j]->getType())) + break; + + // Check whether the filters are already sorted by length. We need to know + // if sorting them is actually going to do anything so that we only make a + // new landingpad instruction if it does. + for (unsigned k = i; k + 1 < j; ++k) + if (shorter_filter(NewClauses[k+1], NewClauses[k])) { + // Not sorted, so sort the filters now. Doing an unstable sort would be + // correct too but reordering filters pointlessly might confuse users. + std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, + shorter_filter); + MakeNewInstruction = true; + break; + } + + // Look for the next batch of filters. + i = j + 1; + } + + // If typeinfos matched if and only if equal, then the elements of a filter L + // that occurs later than a filter F could be replaced by the intersection of + // the elements of F and L. In reality two typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some class + // derived from it) so it would be wrong to perform this transform in general. + // However the transform is correct and useful if F is a subset of L. In that + // case L can be replaced by F, and thus removed altogether since repeating a + // filter is pointless. So here we look at all pairs of filters F and L where + // L follows F in the list of clauses, and remove L if every element of F is + // an element of L. This can occur when inlining C++ functions with exception + // specifications. + for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { + // Examine each filter in turn. + Value *Filter = NewClauses[i]; + ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType()); + if (!FTy) + // Not a filter - skip it. + continue; + unsigned FElts = FTy->getNumElements(); + // Examine each filter following this one. Doing this backwards means that + // we don't have to worry about filters disappearing under us when removed. + for (unsigned j = NewClauses.size() - 1; j != i; --j) { + Value *LFilter = NewClauses[j]; + ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType()); + if (!LTy) + // Not a filter - skip it. + continue; + // If Filter is a subset of LFilter, i.e. every element of Filter is also + // an element of LFilter, then discard LFilter. + SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j; + // If Filter is empty then it is a subset of LFilter. + if (!FElts) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + // Move on to the next filter. + continue; + } + unsigned LElts = LTy->getNumElements(); + // If Filter is longer than LFilter then it cannot be a subset of it. + if (FElts > LElts) + // Move on to the next filter. + continue; + // At this point we know that LFilter has at least one element. + if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros. + // Filter is a subset of LFilter iff Filter contains only zeros (as we + // already know that Filter is not longer than LFilter). + if (isa<ConstantAggregateZero>(Filter)) { + assert(FElts <= LElts && "Should have handled this case earlier!"); + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + continue; + } + ConstantArray *LArray = cast<ConstantArray>(LFilter); + if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros. + // Since Filter is non-empty and contains only zeros, it is a subset of + // LFilter iff LFilter contains a zero. + assert(FElts > 0 && "Should have eliminated the empty filter earlier!"); + for (unsigned l = 0; l != LElts; ++l) + if (LArray->getOperand(l)->isNullValue()) { + // LFilter contains a zero - discard it. + NewClauses.erase(J); + MakeNewInstruction = true; + break; + } + // Move on to the next filter. + continue; + } + // At this point we know that both filters are ConstantArrays. Loop over + // operands to see whether every element of Filter is also an element of + // LFilter. Since filters tend to be short this is probably faster than + // using a method that scales nicely. + ConstantArray *FArray = cast<ConstantArray>(Filter); + bool AllFound = true; + for (unsigned f = 0; f != FElts; ++f) { + Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts(); + AllFound = false; + for (unsigned l = 0; l != LElts; ++l) { + Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts(); + if (LTypeInfo == FTypeInfo) { + AllFound = true; + break; + } + } + if (!AllFound) + break; + } + if (AllFound) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + } + } + + // If we changed any of the clauses, replace the old landingpad instruction + // with a new one. + if (MakeNewInstruction) { + LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), + LI.getPersonalityFn(), + NewClauses.size()); + for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) + NLI->addClause(NewClauses[i]); + // A landing pad with no clauses must have the cleanup flag set. It is + // theoretically possible, though highly unlikely, that we eliminated all + // clauses. If so, force the cleanup flag to true. + if (NewClauses.empty()) + CleanupFlag = true; + NLI->setCleanup(CleanupFlag); + return NLI; + } + + // Even if none of the clauses changed, we may nonetheless have understood + // that the cleanup flag is pointless. Clear it if so. + if (LI.isCleanup() != CleanupFlag) { + assert(!CleanupFlag && "Adding a cleanup, not removing one?!"); + LI.setCleanup(CleanupFlag); + return &LI; + } + return 0; } @@ -923,7 +1758,8 @@ assert(I->hasOneUse() && "Invariants didn't hold!"); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) + if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() || + isa<TerminatorInst>(I)) return false; // Do not sink alloca instructions out of the entry block. @@ -940,8 +1776,7 @@ return false; } - BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); - + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(InsertPos); ++NumSunkInst; return true; @@ -964,12 +1799,10 @@ bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; Worklist.push_back(BB); - - std::vector<Instruction*> InstrsForInstCombineWorklist; - InstrsForInstCombineWorklist.reserve(128); - SmallPtrSet<ConstantExpr*, 64> FoldedConstants; - + SmallVector<Instruction*, 128> InstrsForInstCombineWorklist; + DenseMap<ConstantExpr*, Constant*> FoldedConstants; + do { BB = Worklist.pop_back_val(); @@ -1004,14 +1837,15 @@ i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(i); if (CE == 0) continue; - - // If we already folded this constant, don't try again. - if (!FoldedConstants.insert(CE)) - continue; - - Constant *NewC = ConstantFoldConstantExpression(CE, TD); - if (NewC && NewC != CE) { - *i = NewC; + + Constant*& FoldRes = FoldedConstants[CE]; + if (!FoldRes) + FoldRes = ConstantFoldConstantExpression(CE, TD); + if (!FoldRes) + FoldRes = CE; + + if (FoldRes != CE) { + *i = FoldRes; MadeIRChange = true; } } @@ -1077,27 +1911,29 @@ // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents // the instcombine code from having to deal with some bad special cases. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (!Visited.count(BB)) { - Instruction *Term = BB->getTerminator(); - while (Term != BB->begin()) { // Remove instrs bottom-up - BasicBlock::iterator I = Term; --I; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (Visited.count(BB)) continue; - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - // A debug intrinsic shouldn't force another iteration if we weren't - // going to do one without it. - if (!ISA_DEBUG_INFO_INTRINSIC(I)) { - ++NumDeadInst; - MadeIRChange = true; - } - - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I->getType()->isVoidTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; } + if (!isa<DbgInfoIntrinsic>(Inst)) { + ++NumDeadInst; + MadeIRChange = true; + } + Inst->eraseFromParent(); } + } } while (!Worklist.isEmpty()) { @@ -1158,6 +1994,7 @@ // Now that we have an instruction, try combining it to simplify it. Builder->SetInsertPoint(I->getParent(), I); + Builder->SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; @@ -1172,23 +2009,26 @@ DEBUG(errs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); + if (!I->getDebugLoc().isUnknown()) + Result->setDebugLoc(I->getDebugLoc()); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); + // Move the name to the new instruction first. + Result->takeName(I); + // Push the new instruction and any users onto the worklist. Worklist.Add(Result); Worklist.AddUsersToWorkList(*Result); - // Move the name to the new instruction first. - Result->takeName(I); - // Insert the new instruction into the basic block... BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I; - if (!isa<PHINode>(Result)) // If combining a PHI, don't insert - while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. - ++InsertPos; + // If we replace a PHI with something that isn't a PHI, fix up the + // insertion point. + if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos)) + InsertPos = InstParent->getFirstInsertionPt(); InstParent->getInstList().insert(InsertPos, Result); @@ -1218,19 +2058,22 @@ bool InstCombiner::runOnFunction(Function &F) { - MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); TD = getAnalysisIfAvailable<TargetData>(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. - IRBuilder<TargetFolder, InstCombineIRInserter> + IRBuilder<true, TargetFolder, InstCombineIRInserter> TheBuilder(F.getContext(), TargetFolder(TD), InstCombineIRInserter(Worklist)); Builder = &TheBuilder; bool EverMadeChange = false; + // Lower dbg.declare intrinsics otherwise their value may be clobbered + // by instcombiner. + EverMadeChange = LowerDbgDeclare(F); + // Iterate while there is work to do. unsigned Iteration = 0; while (DoOneIteration(F, Iteration++))
diff --git a/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj new file mode 100644 index 0000000..2fa2c4c --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj
@@ -0,0 +1,376 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|Win32"> + <Configuration>Profile</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|x64"> + <Configuration>Profile</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{D35C7204-D4E0-4EE5-8B6D-BA1B589F5D36}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>LLVMInstCombine</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMInstCombine.pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <OmitFramePointers>false</OmitFramePointers> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstCombine.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="InstructionCombining.cpp" /> + <ClCompile Include="InstCombineAddSub.cpp" /> + <ClCompile Include="InstCombineAndOrXor.cpp" /> + <ClCompile Include="InstCombineCalls.cpp" /> + <ClCompile Include="InstCombineCasts.cpp" /> + <ClCompile Include="InstCombineCompares.cpp" /> + <ClCompile Include="InstCombineLoadStoreAlloca.cpp" /> + <ClCompile Include="InstCombineMulDivRem.cpp" /> + <ClCompile Include="InstCombinePHI.cpp" /> + <ClCompile Include="InstCombineSelect.cpp" /> + <ClCompile Include="InstCombineShifts.cpp" /> + <ClCompile Include="InstCombineSimplifyDemanded.cpp" /> + <ClCompile Include="InstCombineVectorOps.cpp" /> + <ClInclude Include="InstCombine.h" /> + <ClInclude Include="InstCombineWorklist.h" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj"> + <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project> + </ProjectReference> + <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj"> + <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project> + </ProjectReference> + <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj"> + <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project> + </ProjectReference> + <ProjectReference Include="..\..\Target/LLVMTarget.vcxproj"> + <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project> + </ProjectReference> + <ProjectReference Include="..\Utils/LLVMTransformUtils.vcxproj"> + <Project>5C514254-58EE-4850-8743-F5D7BEAA3E66</Project> + </ProjectReference> + <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj"> + <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj.filters b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj.filters new file mode 100644 index 0000000..a6ead53 --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/LLVMInstCombine.vcxproj.filters
@@ -0,0 +1,66 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <ClCompile Include="InstructionCombining.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineAddSub.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineAndOrXor.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineCalls.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineCasts.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineCompares.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineLoadStoreAlloca.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineMulDivRem.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombinePHI.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineSelect.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineShifts.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineSimplifyDemanded.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstCombineVectorOps.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="InstCombine.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="InstCombineWorklist.h"> + <Filter>Header Files</Filter> + </ClInclude> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier> + </Filter> + <Filter Include="Header Files"> + <UniqueIdentifier>{CFA0CD99-0550-4E94-A4D9-080C3F5D695C}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj new file mode 100644 index 0000000..62e3273 --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\InstCombine;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/InstCombine/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/src/LLVM/lib/Transforms/Instrumentation/EdgeProfiling.cpp index a77d70c..e8ef265 100644 --- a/src/LLVM/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/src/LLVM/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -17,6 +17,7 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-edge-profiling" + #include "ProfilingUtils.h" #include "llvm/Module.h" #include "llvm/Pass.h" @@ -34,7 +35,9 @@ bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - EdgeProfiler() : ModulePass(ID) {} + EdgeProfiler() : ModulePass(ID) { + initializeEdgeProfilerPass(*PassRegistry::getPassRegistry()); + } virtual const char *getPassName() const { return "Edge Profiler"; @@ -44,7 +47,7 @@ char EdgeProfiler::ID = 0; INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling", - "Insert instrumentation for edge profiling", false, false); + "Insert instrumentation for edge profiling", false, false) ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } @@ -71,7 +74,7 @@ } } - const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); + Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); GlobalVariable *Counters = new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, Constant::getNullValue(ATy), "EdgeProfCounters"); @@ -98,7 +101,7 @@ // otherwise insert it in the successor block. if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block - IncrementCounterInBlock(BB, i++, Counters); + IncrementCounterInBlock(BB, i++, Counters, false); } else { // Insert counter at the start of the block IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
diff --git a/src/LLVM/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/src/LLVM/lib/Transforms/Instrumentation/GCOVProfiling.cpp new file mode 100644 index 0000000..ccf7e11 --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -0,0 +1,668 @@ +//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements GCOV-style profiling. When this pass is run it emits +// "gcno" files next to the existing source, and instruments the code that runs +// to records the edges between blocks that run and emit a complementary "gcda" +// file on exit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "insert-gcov-profiling" + +#include "ProfilingUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/PathV2.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/UniqueVector.h" +#include <string> +#include <utility> +using namespace llvm; + +namespace { + class GCOVProfiler : public ModulePass { + public: + static char ID; + GCOVProfiler() + : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) { + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false) + : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), + Use402Format(use402Format) { + assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "GCOV Profiler"; + } + + private: + bool runOnModule(Module &M); + + // Create the GCNO files for the Module based on DebugInfo. + void emitGCNO(); + + // Modify the program to track transitions along edges and call into the + // profiling runtime to emit .gcda files when run. + bool emitProfileArcs(); + + // Get pointers to the functions in the runtime library. + Constant *getStartFileFunc(); + Constant *getIncrementIndirectCounterFunc(); + Constant *getEmitFunctionFunc(); + Constant *getEmitArcsFunc(); + Constant *getEndFileFunc(); + + // Create or retrieve an i32 state value that is used to represent the + // pred block number for certain non-trivial edges. + GlobalVariable *getEdgeStateValue(); + + // Produce a table of pointers to counters, by predecessor and successor + // block number. + GlobalVariable *buildEdgeLookupTable(Function *F, + GlobalVariable *Counter, + const UniqueVector<BasicBlock *> &Preds, + const UniqueVector<BasicBlock *> &Succs); + + // Add the function to write out all our counters to the global destructor + // list. + void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *, + MDNode *>, 8> &); + + std::string mangleName(DICompileUnit CU, std::string NewStem); + + bool EmitNotes; + bool EmitData; + bool Use402Format; + + Module *M; + LLVMContext *Ctx; + }; +} + +char GCOVProfiler::ID = 0; +INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", + "Insert instrumentation for GCOV profiling", false, false) + +ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, + bool Use402Format) { + return new GCOVProfiler(EmitNotes, EmitData, Use402Format); +} + +namespace { + class GCOVRecord { + protected: + static const char *LinesTag; + static const char *FunctionTag; + static const char *BlockTag; + static const char *EdgeTag; + + GCOVRecord() {} + + void writeBytes(const char *Bytes, int Size) { + os->write(Bytes, Size); + } + + void write(uint32_t i) { + writeBytes(reinterpret_cast<char*>(&i), 4); + } + + // Returns the length measured in 4-byte blocks that will be used to + // represent this string in a GCOV file + unsigned lengthOfGCOVString(StringRef s) { + // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs + // padding out to the next 4-byte word. The length is measured in 4-byte + // words including padding, not bytes of actual string. + return (s.size() / 4) + 1; + } + + void writeGCOVString(StringRef s) { + uint32_t Len = lengthOfGCOVString(s); + write(Len); + writeBytes(s.data(), s.size()); + + // Write 1 to 4 bytes of NUL padding. + assert((unsigned)(4 - (s.size() % 4)) > 0); + assert((unsigned)(4 - (s.size() % 4)) <= 4); + writeBytes("\0\0\0\0", 4 - (s.size() % 4)); + } + + raw_ostream *os; + }; + const char *GCOVRecord::LinesTag = "\0\0\x45\x01"; + const char *GCOVRecord::FunctionTag = "\0\0\0\1"; + const char *GCOVRecord::BlockTag = "\0\0\x41\x01"; + const char *GCOVRecord::EdgeTag = "\0\0\x43\x01"; + + class GCOVFunction; + class GCOVBlock; + + // Constructed only by requesting it from a GCOVBlock, this object stores a + // list of line numbers and a single filename, representing lines that belong + // to the block. + class GCOVLines : public GCOVRecord { + public: + void addLine(uint32_t Line) { + Lines.push_back(Line); + } + + uint32_t length() { + // Here 2 = 1 for string lenght + 1 for '0' id#. + return lengthOfGCOVString(Filename) + 2 + Lines.size(); + } + + void writeOut() { + write(0); + writeGCOVString(Filename); + for (int i = 0, e = Lines.size(); i != e; ++i) + write(Lines[i]); + } + + GCOVLines(StringRef F, raw_ostream *os) + : Filename(F) { + this->os = os; + } + + private: + StringRef Filename; + SmallVector<uint32_t, 32> Lines; + }; + + // Represent a basic block in GCOV. Each block has a unique number in the + // function, number of lines belonging to each block, and a set of edges to + // other blocks. + class GCOVBlock : public GCOVRecord { + public: + GCOVLines &getFile(StringRef Filename) { + GCOVLines *&Lines = LinesByFile[Filename]; + if (!Lines) { + Lines = new GCOVLines(Filename, os); + } + return *Lines; + } + + void addEdge(GCOVBlock &Successor) { + OutEdges.push_back(&Successor); + } + + void writeOut() { + uint32_t Len = 3; + for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) { + Len += I->second->length(); + } + + writeBytes(LinesTag, 4); + write(Len); + write(Number); + for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) + I->second->writeOut(); + write(0); + write(0); + } + + ~GCOVBlock() { + DeleteContainerSeconds(LinesByFile); + } + + private: + friend class GCOVFunction; + + GCOVBlock(uint32_t Number, raw_ostream *os) + : Number(Number) { + this->os = os; + } + + uint32_t Number; + StringMap<GCOVLines *> LinesByFile; + SmallVector<GCOVBlock *, 4> OutEdges; + }; + + // A function has a unique identifier, a checksum (we leave as zero) and a + // set of blocks and a map of edges between blocks. This is the only GCOV + // object users can construct, the blocks and lines will be rooted here. + class GCOVFunction : public GCOVRecord { + public: + GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) { + this->os = os; + + Function *F = SP.getFunction(); + uint32_t i = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + Blocks[BB] = new GCOVBlock(i++, os); + } + ReturnBlock = new GCOVBlock(i++, os); + + writeBytes(FunctionTag, 4); + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + 1 + lengthOfGCOVString(SP.getFilename()) + 1; + if (!Use402Format) + ++BlockLen; // For second checksum. + write(BlockLen); + uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); + write(Ident); + write(0); // checksum #1 + if (!Use402Format) + write(0); // checksum #2 + writeGCOVString(SP.getName()); + writeGCOVString(SP.getFilename()); + write(SP.getLineNumber()); + } + + ~GCOVFunction() { + DeleteContainerSeconds(Blocks); + delete ReturnBlock; + } + + GCOVBlock &getBlock(BasicBlock *BB) { + return *Blocks[BB]; + } + + GCOVBlock &getReturnBlock() { + return *ReturnBlock; + } + + void writeOut() { + // Emit count of blocks. + writeBytes(BlockTag, 4); + write(Blocks.size() + 1); + for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { + write(0); // No flags on our blocks. + } + + // Emit edges between blocks. + for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + GCOVBlock &Block = *I->second; + if (Block.OutEdges.empty()) continue; + + writeBytes(EdgeTag, 4); + write(Block.OutEdges.size() * 2 + 1); + write(Block.Number); + for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { + write(Block.OutEdges[i]->Number); + write(0); // no flags + } + } + + // Emit lines for each block. + for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + I->second->writeOut(); + } + } + + private: + DenseMap<BasicBlock *, GCOVBlock *> Blocks; + GCOVBlock *ReturnBlock; + }; +} + +std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) { + if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) { + for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) { + MDNode *N = GCov->getOperand(i); + if (N->getNumOperands() != 2) continue; + MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0)); + MDNode *CompileUnit = dyn_cast<MDNode>(N->getOperand(1)); + if (!GCovFile || !CompileUnit) continue; + if (CompileUnit == CU) { + SmallString<128> Filename = GCovFile->getString(); + sys::path::replace_extension(Filename, NewStem); + return Filename.str(); + } + } + } + + SmallString<128> Filename = CU.getFilename(); + sys::path::replace_extension(Filename, NewStem); + return sys::path::filename(Filename.str()); +} + +bool GCOVProfiler::runOnModule(Module &M) { + this->M = &M; + Ctx = &M.getContext(); + + if (EmitNotes) emitGCNO(); + if (EmitData) return emitProfileArcs(); + return false; +} + +void GCOVProfiler::emitGCNO() { + DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (CU_Nodes) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(CU_Nodes->getOperand(i)); + raw_fd_ostream *&out = GcnoFiles[CU]; + std::string ErrorInfo; + out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, + raw_fd_ostream::F_Binary); + if (!Use402Format) + out->write("oncg*404MVLL", 12); + else + out->write("oncg*204MVLL", 12); + + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + raw_fd_ostream *&os = GcnoFiles[CU]; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, os, Use402Format); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); + } + } + Func.writeOut(); + } + } + } + + for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator + I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) { + raw_fd_ostream *&out = I->second; + out->write("\0\0\0\0\0\0\0\0", 8); // EOF + out->close(); + delete out; + } +} + +bool GCOVProfiler::emitProfileArcs() { + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) return false; + + bool Result = false; + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CU(CU_Nodes->getOperand(i)); + DIArray SPs = CU.getSubprograms(); + SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + Function *F = SP.getFunction(); + if (!F) continue; + if (!Result) Result = true; + unsigned Edges = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + ++Edges; + else + Edges += TI->getNumSuccessors(); + } + + ArrayType *CounterTy = + ArrayType::get(Type::getInt64Ty(*Ctx), Edges); + GlobalVariable *Counters = + new GlobalVariable(*M, CounterTy, false, + GlobalValue::InternalLinkage, + Constant::getNullValue(CounterTy), + "__llvm_gcov_ctr", 0, false, 0); + CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); + + UniqueVector<BasicBlock *> ComplexEdgePreds; + UniqueVector<BasicBlock *> ComplexEdgeSuccs; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors) { + IRBuilder<> Builder(TI); + + if (Successors == 1) { + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + Value *Sel = Builder.CreateSelect( + BI->getCondition(), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); + SmallVector<Value *, 2> Idx; + Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Sel); + Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else { + ComplexEdgePreds.insert(BB); + for (int i = 0; i != Successors; ++i) + ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + } + Edge += Successors; + } + } + + if (!ComplexEdgePreds.empty()) { + GlobalVariable *EdgeTable = + buildEdgeLookupTable(F, Counters, + ComplexEdgePreds, ComplexEdgeSuccs); + GlobalVariable *EdgeState = getEdgeStateValue(); + + Type *Int32Ty = Type::getInt32Ty(*Ctx); + for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { + IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); + Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + } + for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { + // call runtime to perform increment + BasicBlock::iterator InsertPt = + ComplexEdgeSuccs[i+1]->getFirstInsertionPt(); + IRBuilder<> Builder(InsertPt); + Value *CounterPtrArray = + Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, + i * ComplexEdgePreds.size()); + Builder.CreateCall2(getIncrementIndirectCounterFunc(), + EdgeState, CounterPtrArray); + // clear the predecessor number + Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + } + } + } + insertCounterWriteout(CountersBySP); + } + return Result; +} + +// All edges with successors that aren't branches are "complex", because it +// requires complex logic to pick which counter to update. +GlobalVariable *GCOVProfiler::buildEdgeLookupTable( + Function *F, + GlobalVariable *Counters, + const UniqueVector<BasicBlock *> &Preds, + const UniqueVector<BasicBlock *> &Succs) { + // TODO: support invoke, threads. We rely on the fact that nothing can modify + // the whole-Module pred edge# between the time we set it and the time we next + // read it. Threads and invoke make this untrue. + + // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. + Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + ArrayType *EdgeTableTy = ArrayType::get( + Int64PtrTy, Succs.size() * Preds.size()); + + Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; + Constant *NullValue = Constant::getNullValue(Int64PtrTy); + for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i) + EdgeTable[i] = NullValue; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { + for (int i = 0; i != Successors; ++i) { + BasicBlock *Succ = TI->getSuccessor(i); + IRBuilder<> builder(Succ); + Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge + i); + EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + + (Preds.idFor(BB)-1)] = cast<Constant>(Counter); + } + } + Edge += Successors; + } + + ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size()); + GlobalVariable *EdgeTableGV = + new GlobalVariable( + *M, EdgeTableTy, true, GlobalValue::InternalLinkage, + ConstantArray::get(EdgeTableTy, V), + "__llvm_gcda_edge_table"); + EdgeTableGV->setUnnamedAddr(true); + return EdgeTableGV; +} + +Constant *GCOVProfiler::getStartFileFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Type::getInt8PtrTy(*Ctx), false); + return M->getOrInsertFunction("llvm_gcda_start_file", FTy); +} + +Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { + Type *Args[] = { + Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor + Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row + }; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy); +} + +Constant *GCOVProfiler::getEmitFunctionFunc() { + Type *Args[2] = { + Type::getInt32Ty(*Ctx), // uint32_t ident + Type::getInt8PtrTy(*Ctx), // const char *function_name + }; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); +} + +Constant *GCOVProfiler::getEmitArcsFunc() { + Type *Args[] = { + Type::getInt32Ty(*Ctx), // uint32_t num_counters + Type::getInt64PtrTy(*Ctx), // uint64_t *counters + }; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); +} + +Constant *GCOVProfiler::getEndFileFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_gcda_end_file", FTy); +} + +GlobalVariable *GCOVProfiler::getEdgeStateValue() { + GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred"); + if (!GV) { + GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false, + GlobalValue::InternalLinkage, + ConstantInt::get(Type::getInt32Ty(*Ctx), + 0xffffffff), + "__llvm_gcov_global_state_pred"); + GV->setUnnamedAddr(true); + } + return GV; +} + +void GCOVProfiler::insertCounterWriteout( + SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { + FunctionType *WriteoutFTy = + FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *WriteoutF = Function::Create(WriteoutFTy, + GlobalValue::InternalLinkage, + "__llvm_gcov_writeout", M); + WriteoutF->setUnnamedAddr(true); + BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF); + IRBuilder<> Builder(BB); + + Constant *StartFile = getStartFileFunc(); + Constant *EmitFunction = getEmitFunctionFunc(); + Constant *EmitArcs = getEmitArcsFunc(); + Constant *EndFile = getEndFileFunc(); + + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (CU_Nodes) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit compile_unit(CU_Nodes->getOperand(i)); + std::string FilenameGcda = mangleName(compile_unit, "gcda"); + Builder.CreateCall(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda)); + for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + I = CountersBySP.begin(), E = CountersBySP.end(); + I != E; ++I) { + DISubprogram SP(I->second); + intptr_t ident = reinterpret_cast<intptr_t>(I->second); + Builder.CreateCall2(EmitFunction, + ConstantInt::get(Type::getInt32Ty(*Ctx), ident), + Builder.CreateGlobalStringPtr(SP.getName())); + + GlobalVariable *GV = I->first; + unsigned Arcs = + cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); + Builder.CreateCall2(EmitArcs, + ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.CreateConstGEP2_64(GV, 0, 0)); + } + Builder.CreateCall(EndFile); + } + } + Builder.CreateRetVoid(); + + InsertProfilingShutdownCall(WriteoutF, M); +}
diff --git a/src/LLVM/lib/Transforms/Instrumentation/INSTALL.vcxproj b/src/LLVM/lib/Transforms/Instrumentation/INSTALL.vcxproj new file mode 100644 index 0000000..168a3b9 --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Instrumentation/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/Instrumentation/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Instrumentation/Instrumentation.cpp b/src/LLVM/lib/Transforms/Instrumentation/Instrumentation.cpp new file mode 100644 index 0000000..71adc1e --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,33 @@ +//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common initialization infrastructure for the +// Instrumentation library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeInstrumentation - Initialize all passes in the TransformUtils +/// library. +void llvm::initializeInstrumentation(PassRegistry &Registry) { + initializeEdgeProfilerPass(Registry); + initializeOptimalEdgeProfilerPass(Registry); + initializePathProfilerPass(Registry); + initializeGCOVProfilerPass(Registry); +} + +/// LLVMInitializeInstrumentation - C binding for +/// initializeInstrumentation. +void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) { + initializeInstrumentation(*unwrap(R)); +}
diff --git a/src/LLVM/lib/Transforms/Instrumentation/LLVMInstrumentation.vcxproj b/src/LLVM/lib/Transforms/Instrumentation/LLVMInstrumentation.vcxproj new file mode 100644 index 0000000..2073886 --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/LLVMInstrumentation.vcxproj
@@ -0,0 +1,366 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|Win32"> + <Configuration>Profile</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|x64"> + <Configuration>Profile</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{E7674720-3108-434D-B5B5-B4C9CFDAD42E}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>LLVMInstrumentation</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMInstrumentation.pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMInstrumentation.pdb</ProgramDataBaseFileName> + <OmitFramePointers>false</OmitFramePointers> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstrumentation.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstrumentation.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstrumentation.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMInstrumentation.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="EdgeProfiling.cpp" /> + <ClCompile Include="GCOVProfiling.cpp" /> + <ClCompile Include="Instrumentation.cpp" /> + <ClCompile Include="OptimalEdgeProfiling.cpp" /> + <ClCompile Include="PathProfiling.cpp" /> + <ClCompile Include="ProfilingUtils.cpp" /> + <ClInclude Include="MaximumSpanningTree.h" /> + <ClInclude Include="ProfilingUtils.h" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj"> + <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project> + </ProjectReference> + <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj"> + <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project> + </ProjectReference> + <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj"> + <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project> + </ProjectReference> + <ProjectReference Include="..\Utils/LLVMTransformUtils.vcxproj"> + <Project>5C514254-58EE-4850-8743-F5D7BEAA3E66</Project> + </ProjectReference> + <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj"> + <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Instrumentation/LLVMInstrumentation.vcxproj.filters b/src/LLVM/lib/Transforms/Instrumentation/LLVMInstrumentation.vcxproj.filters new file mode 100644 index 0000000..7dee64f --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/LLVMInstrumentation.vcxproj.filters
@@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <ClCompile Include="EdgeProfiling.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="GCOVProfiling.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="Instrumentation.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="OptimalEdgeProfiling.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="PathProfiling.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ProfilingUtils.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="MaximumSpanningTree.h"> + <Filter>Header Files</Filter> + </ClInclude> + <ClInclude Include="ProfilingUtils.h"> + <Filter>Header Files</Filter> + </ClInclude> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier> + </Filter> + <Filter Include="Header Files"> + <UniqueIdentifier>{CFA0CD99-0550-4E94-A4D9-080C3F5D695C}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/src/LLVM/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 829da6b..f76c77e 100644 --- a/src/LLVM/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/src/LLVM/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This module privides means for calculating a maximum spanning tree for a +// This module provides means for calculating a maximum spanning tree for a // given set of weighted edges. The type parameter T is the type of a node. // //===----------------------------------------------------------------------===//
diff --git a/src/LLVM/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/src/LLVM/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 8eec987..62c21b8 100644 --- a/src/LLVM/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/src/LLVM/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-optimal-edge-profiling" #include "ProfilingUtils.h" +#include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" @@ -26,7 +27,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "MaximumSpanningTree.h" -#include <set> using namespace llvm; STATISTIC(NumEdgesInserted, "The # of edges inserted."); @@ -36,7 +36,9 @@ bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - OptimalEdgeProfiler() : ModulePass(ID) {} + OptimalEdgeProfiler() : ModulePass(ID) { + initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(ProfileEstimatorPassID); @@ -50,9 +52,14 @@ } char OptimalEdgeProfiler::ID = 0; -INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling", +INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling", "Insert optimal instrumentation for edge profiling", - false, false); + false, false) +INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass) +INITIALIZE_AG_DEPENDENCY(ProfileInfo) +INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling", + "Insert optimal instrumentation for edge profiling", + false, false) ModulePass *llvm::createOptimalEdgeProfilerPass() { return new OptimalEdgeProfiler(); @@ -105,44 +112,44 @@ // be calculated from other edge counters on reading the profile info back // in. - const Type *Int32 = Type::getInt32Ty(M.getContext()); - const ArrayType *ATy = ArrayType::get(Int32, NumEdges); + Type *Int32 = Type::getInt32Ty(M.getContext()); + ArrayType *ATy = ArrayType::get(Int32, NumEdges); GlobalVariable *Counters = new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, Constant::getNullValue(ATy), "OptEdgeProfCounters"); NumEdgesInserted = 0; std::vector<Constant*> Initializer(NumEdges); - Constant* Zero = ConstantInt::get(Int32, 0); - Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); + Constant *Zero = ConstantInt::get(Int32, 0); + Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); // Instrument all of the edges not in MST... unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual // edges (0,entry) and (BB,0) (for blocks with no successors) and this - // edges also participate in the maximum spanning tree calculation. + // edges also participate in the maximum spanning tree calculation. // The third parameter of MaximumSpanningTree() has the effect that not the // actual MST is returned but the edges _not_ in the MST. - ProfileInfo::EdgeWeights ECs = + ProfileInfo::EdgeWeights ECs = getAnalysis<ProfileInfo>(*F).getEdgeWeights(F); std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end()); - MaximumSpanningTree<BasicBlock> MST (EdgeVector); - std::stable_sort(MST.begin(),MST.end()); + MaximumSpanningTree<BasicBlock> MST(EdgeVector); + std::stable_sort(MST.begin(), MST.end()); // Check if (0,entry) not in the MST. If not, instrument edge // (IncrementCounterInBlock()) and set the counter initially to zero, if // the edge is in the MST the counter is initialised to -1. BasicBlock *entry = &(F->getEntryBlock()); - ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); + ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,entry,i); + printEdgeCounter(edge, entry, i); IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -163,9 +170,9 @@ // has no successors, the virtual edge (BB,0) is processed. TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 0) { - ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -188,11 +195,11 @@ // otherwise insert it in the successor block. if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block - printEdgeCounter(edge,Succ,i); + printEdgeCounter(edge, Succ, i); IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); @@ -205,9 +212,9 @@ // Check if the number of edges counted at first was the number of edges we // considered for instrumentation. - assert(i==NumEdges && "the number of edges in counting array is wrong"); + assert(i == NumEdges && "the number of edges in counting array is wrong"); - // Assing the now completely defined initialiser to the array. + // Assign the now completely defined initialiser to the array. Constant *init = ConstantArray::get(ATy, Initializer); Counters->setInitializer(init);
diff --git a/src/LLVM/lib/Transforms/Instrumentation/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/Instrumentation/PACKAGE.vcxproj new file mode 100644 index 0000000..af070d0 --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Instrumentation;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Instrumentation/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/Instrumentation/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Instrumentation/PathProfiling.cpp b/src/LLVM/lib/Transforms/Instrumentation/PathProfiling.cpp new file mode 100644 index 0000000..23915d3 --- /dev/null +++ b/src/LLVM/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1424 @@ +//===- PathProfiling.cpp - Inserts counters for path profiling ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass instruments functions for Ball-Larus path profiling. Ball-Larus +// profiling converts the CFG into a DAG by replacing backedges with edges +// from entry to the start block and from the end block to exit. The paths +// along the new DAG are enumrated, i.e. each path is given a path number. +// Edges are instrumented to increment the path number register, such that the +// path number register will equal the path number of the path taken at the +// exit. +// +// This file defines classes for building a CFG for use with different stages +// in the Ball-Larus path profiling instrumentation [Ball96]. The +// requirements are formatting the llvm CFG into the Ball-Larus DAG, path +// numbering, finding a spanning tree, moving increments from the spanning +// tree to chords. +// +// Terms: +// DAG - Directed Acyclic Graph. +// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges +// removed in the following manner. For every backedge +// v->w, insert edge ENTRY->w and edge v->EXIT. +// Path Number - The number corresponding to a specific path through a +// Ball-Larus DAG. +// Spanning Tree - A subgraph, S, is a spanning tree if S covers all +// vertices and is a tree. +// Chord - An edge not in the spanning tree. +// +// [Ball96] +// T. Ball and J. R. Larus. "Efficient Path Profiling." +// International Symposium on Microarchitecture, pages 46-57, 1996. +// http://portal.acm.org/citation.cfm?id=243857 +// +// [Ball94] +// Thomas Ball. "Efficiently Counting Program Events with Support for +// On-line queries." +// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5, +// September 1994, Pages 1399-1410. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "insert-path-profiling" + +#include "llvm/DerivedTypes.h" +#include "ProfilingUtils.h" +#include "llvm/Analysis/PathNumbering.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TypeBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include <vector> + +#define HASH_THRESHHOLD 100000 + +using namespace llvm; + +namespace { +class BLInstrumentationNode; +class BLInstrumentationEdge; +class BLInstrumentationDag; + +// --------------------------------------------------------------------------- +// BLInstrumentationNode extends BallLarusNode with member used by the +// instrumentation algortihms. +// --------------------------------------------------------------------------- +class BLInstrumentationNode : public BallLarusNode { +public: + // Creates a new BLInstrumentationNode from a BasicBlock. + BLInstrumentationNode(BasicBlock* BB); + + // Get/sets the Value corresponding to the pathNumber register, + // constant or phinode. Used by the instrumentation code to remember + // path number Values. + Value* getStartingPathNumber(); + void setStartingPathNumber(Value* pathNumber); + + Value* getEndingPathNumber(); + void setEndingPathNumber(Value* pathNumber); + + // Get/set the PHINode Instruction for this node. + PHINode* getPathPHI(); + void setPathPHI(PHINode* pathPHI); + +private: + + Value* _startingPathNumber; // The Value for the current pathNumber. + Value* _endingPathNumber; // The Value for the current pathNumber. + PHINode* _pathPHI; // The PHINode for current pathNumber. +}; + +// -------------------------------------------------------------------------- +// BLInstrumentationEdge extends BallLarusEdge with data about the +// instrumentation that will end up on each edge. +// -------------------------------------------------------------------------- +class BLInstrumentationEdge : public BallLarusEdge { +public: + BLInstrumentationEdge(BLInstrumentationNode* source, + BLInstrumentationNode* target); + + // Sets the target node of this edge. Required to split edges. + void setTarget(BallLarusNode* node); + + // Get/set whether edge is in the spanning tree. + bool isInSpanningTree() const; + void setIsInSpanningTree(bool isInSpanningTree); + + // Get/ set whether this edge will be instrumented with a path number + // initialization. + bool isInitialization() const; + void setIsInitialization(bool isInitialization); + + // Get/set whether this edge will be instrumented with a path counter + // increment. Notice this is incrementing the path counter + // corresponding to the path number register. The path number + // increment is determined by getIncrement(). + bool isCounterIncrement() const; + void setIsCounterIncrement(bool isCounterIncrement); + + // Get/set the path number increment that this edge will be instrumented + // with. This is distinct from the path counter increment and the + // weight. The counter increment counts the number of executions of + // some path, whereas the path number keeps track of which path number + // the program is on. + long getIncrement() const; + void setIncrement(long increment); + + // Get/set whether the edge has been instrumented. + bool hasInstrumentation(); + void setHasInstrumentation(bool hasInstrumentation); + + // Returns the successor number of this edge in the source. + unsigned getSuccessorNumber(); + +private: + // The increment that the code will be instrumented with. + long long _increment; + + // Whether this edge is in the spanning tree. + bool _isInSpanningTree; + + // Whether this edge is an initialiation of the path number. + bool _isInitialization; + + // Whether this edge is a path counter increment. + bool _isCounterIncrement; + + // Whether this edge has been instrumented. + bool _hasInstrumentation; +}; + +// --------------------------------------------------------------------------- +// BLInstrumentationDag extends BallLarusDag with algorithms that +// determine where instrumentation should be placed. +// --------------------------------------------------------------------------- +class BLInstrumentationDag : public BallLarusDag { +public: + BLInstrumentationDag(Function &F); + + // Returns the Exit->Root edge. This edge is required for creating + // directed cycles in the algorithm for moving instrumentation off of + // the spanning tree + BallLarusEdge* getExitRootEdge(); + + // Returns an array of phony edges which mark those nodes + // with function calls + BLEdgeVector getCallPhonyEdges(); + + // Gets/sets the path counter array + GlobalVariable* getCounterArray(); + void setCounterArray(GlobalVariable* c); + + // Calculates the increments for the chords, thereby removing + // instrumentation from the spanning tree edges. Implementation is based + // on the algorithm in Figure 4 of [Ball94] + void calculateChordIncrements(); + + // Updates the state when an edge has been split + void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock); + + // Calculates a spanning tree of the DAG ignoring cycles. Whichever + // edges are in the spanning tree will not be instrumented, but this + // implementation does not try to minimize the instrumentation overhead + // by trying to find hot edges. + void calculateSpanningTree(); + + // Pushes initialization further down in order to group the first + // increment and initialization. + void pushInitialization(); + + // Pushes the path counter increments up in order to group the last path + // number increment. + void pushCounters(); + + // Removes phony edges from the successor list of the source, and the + // predecessor list of the target. + void unlinkPhony(); + + // Generate dot graph for the function + void generateDotGraph(); + +protected: + // BLInstrumentationDag creates BLInstrumentationNode objects in this + // method overriding the creation of BallLarusNode objects. + // + // Allows subclasses to determine which type of Node is created. + // Override this method to produce subclasses of BallLarusNode if + // necessary. + virtual BallLarusNode* createNode(BasicBlock* BB); + + // BLInstrumentationDag create BLInstrumentationEdges. + // + // Allows subclasses to determine which type of Edge is created. + // Override this method to produce subclasses of BallLarusEdge if + // necessary. Parameters source and target will have been created by + // createNode and can be cast to the subclass of BallLarusNode* + // returned by createNode. + virtual BallLarusEdge* createEdge( + BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber); + +private: + BLEdgeVector _treeEdges; // All edges in the spanning tree. + BLEdgeVector _chordEdges; // All edges not in the spanning tree. + GlobalVariable* _counterArray; // Array to store path counters + + // Removes the edge from the appropriate predecessor and successor lists. + void unlinkEdge(BallLarusEdge* edge); + + // Makes an edge part of the spanning tree. + void makeEdgeSpanning(BLInstrumentationEdge* edge); + + // Pushes initialization and calls itself recursively. + void pushInitializationFromEdge(BLInstrumentationEdge* edge); + + // Pushes path counter increments up recursively. + void pushCountersFromEdge(BLInstrumentationEdge* edge); + + // Depth first algorithm for determining the chord increments.f + void calculateChordIncrementsDfs( + long weight, BallLarusNode* v, BallLarusEdge* e); + + // Determines the relative direction of two edges. + int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f); +}; + +// --------------------------------------------------------------------------- +// PathProfiler is a module pass which instruments path profiling instructions +// --------------------------------------------------------------------------- +class PathProfiler : public ModulePass { +private: + // Current context for multi threading support. + LLVMContext* Context; + + // Which function are we currently instrumenting + unsigned currentFunctionNumber; + + // The function prototype in the profiling runtime for incrementing a + // single path counter in a hash table. + Constant* llvmIncrementHashFunction; + Constant* llvmDecrementHashFunction; + + // Instruments each function with path profiling. 'main' is instrumented + // with code to save the profile to disk. + bool runOnModule(Module &M); + + // Analyzes the function for Ball-Larus path profiling, and inserts code. + void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M); + + // Creates an increment constant representing incr. + ConstantInt* createIncrementConstant(long incr, int bitsize); + + // Creates an increment constant representing the value in + // edge->getIncrement(). + ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge); + + // Finds the insertion point after pathNumber in block. PathNumber may + // be NULL. + BasicBlock::iterator getInsertionPoint( + BasicBlock* block, Value* pathNumber); + + // Inserts source's pathNumber Value* into target. Target may or may not + // have multiple predecessors, and may or may not have its phiNode + // initalized. + void pushValueIntoNode( + BLInstrumentationNode* source, BLInstrumentationNode* target); + + // Inserts source's pathNumber Value* into the appropriate slot of + // target's phiNode. + void pushValueIntoPHI( + BLInstrumentationNode* target, BLInstrumentationNode* source); + + // The Value* in node, oldVal, is updated with a Value* correspodning to + // oldVal + addition. + void insertNumberIncrement(BLInstrumentationNode* node, Value* addition, + bool atBeginning); + + // Creates a counter increment in the given node. The Value* in node is + // taken as the index into a hash table. + void insertCounterIncrement( + Value* incValue, + BasicBlock::iterator insertPoint, + BLInstrumentationDag* dag, + bool increment = true); + + // A PHINode is created in the node, and its values initialized to -1U. + void preparePHI(BLInstrumentationNode* node); + + // Inserts instrumentation for the given edge + // + // Pre: The edge's source node has pathNumber set if edge is non zero + // path number increment. + // + // Post: Edge's target node has a pathNumber set to the path number Value + // corresponding to the value of the path register after edge's + // execution. + void insertInstrumentationStartingAt( + BLInstrumentationEdge* edge, + BLInstrumentationDag* dag); + + // If this edge is a critical edge, then inserts a node at this edge. + // This edge becomes the first edge, and a new BallLarusEdge is created. + bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag); + + // Inserts instrumentation according to the marked edges in dag. Phony + // edges must be unlinked from the DAG, but accessible from the + // backedges. Dag must have initializations, path number increments, and + // counter increments present. + // + // Counter storage is created here. + void insertInstrumentation( BLInstrumentationDag& dag, Module &M); + +public: + static char ID; // Pass identification, replacement for typeid + PathProfiler() : ModulePass(ID) { + initializePathProfilerPass(*PassRegistry::getPassRegistry()); + } + + virtual const char *getPassName() const { + return "Path Profiler"; + } +}; +} // end anonymous namespace + +// Should we print the dot-graphs +static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden, + cl::desc("Output the path profiling DAG for each function.")); + +// Register the path profiler as a pass +char PathProfiler::ID = 0; +INITIALIZE_PASS(PathProfiler, "insert-path-profiling", + "Insert instrumentation for Ball-Larus path profiling", + false, false) + +ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); } + +namespace llvm { + class PathProfilingFunctionTable {}; + + // Type for global array storing references to hashes or arrays + template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable, + xcompile> { + public: + static StructType *get(LLVMContext& C) { + return( StructType::get( + TypeBuilder<types::i<32>, xcompile>::get(C), // type + TypeBuilder<types::i<32>, xcompile>::get(C), // array size + TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr + NULL)); + } + }; + + typedef TypeBuilder<PathProfilingFunctionTable, true> + ftEntryTypeBuilder; + + // BallLarusEdge << operator overloading + raw_ostream& operator<<(raw_ostream& os, + const BLInstrumentationEdge& edge) + LLVM_ATTRIBUTE_USED; + raw_ostream& operator<<(raw_ostream& os, + const BLInstrumentationEdge& edge) { + os << "[" << edge.getSource()->getName() << " -> " + << edge.getTarget()->getName() << "] init: " + << (edge.isInitialization() ? "yes" : "no") + << " incr:" << edge.getIncrement() << " cinc: " + << (edge.isCounterIncrement() ? "yes" : "no"); + return(os); + } +} + +// Creates a new BLInstrumentationNode from a BasicBlock. +BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) : + BallLarusNode(BB), + _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {} + +// Constructor for BLInstrumentationEdge. +BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source, + BLInstrumentationNode* target) + : BallLarusEdge(source, target, 0), + _increment(0), _isInSpanningTree(false), _isInitialization(false), + _isCounterIncrement(false), _hasInstrumentation(false) {} + +// Sets the target node of this edge. Required to split edges. +void BLInstrumentationEdge::setTarget(BallLarusNode* node) { + _target = node; +} + +// Returns whether this edge is in the spanning tree. +bool BLInstrumentationEdge::isInSpanningTree() const { + return(_isInSpanningTree); +} + +// Sets whether this edge is in the spanning tree. +void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) { + _isInSpanningTree = isInSpanningTree; +} + +// Returns whether this edge will be instrumented with a path number +// initialization. +bool BLInstrumentationEdge::isInitialization() const { + return(_isInitialization); +} + +// Sets whether this edge will be instrumented with a path number +// initialization. +void BLInstrumentationEdge::setIsInitialization(bool isInitialization) { + _isInitialization = isInitialization; +} + +// Returns whether this edge will be instrumented with a path counter +// increment. Notice this is incrementing the path counter +// corresponding to the path number register. The path number +// increment is determined by getIncrement(). +bool BLInstrumentationEdge::isCounterIncrement() const { + return(_isCounterIncrement); +} + +// Sets whether this edge will be instrumented with a path counter +// increment. +void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) { + _isCounterIncrement = isCounterIncrement; +} + +// Gets the path number increment that this edge will be instrumented +// with. This is distinct from the path counter increment and the +// weight. The counter increment is counts the number of executions of +// some path, whereas the path number keeps track of which path number +// the program is on. +long BLInstrumentationEdge::getIncrement() const { + return(_increment); +} + +// Set whether this edge will be instrumented with a path number +// increment. +void BLInstrumentationEdge::setIncrement(long increment) { + _increment = increment; +} + +// True iff the edge has already been instrumented. +bool BLInstrumentationEdge::hasInstrumentation() { + return(_hasInstrumentation); +} + +// Set whether this edge has been instrumented. +void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) { + _hasInstrumentation = hasInstrumentation; +} + +// Returns the successor number of this edge in the source. +unsigned BLInstrumentationEdge::getSuccessorNumber() { + BallLarusNode* sourceNode = getSource(); + BallLarusNode* targetNode = getTarget(); + BasicBlock* source = sourceNode->getBlock(); + BasicBlock* target = targetNode->getBlock(); + + if(source == NULL || target == NULL) + return(0); + + TerminatorInst* terminator = source->getTerminator(); + + unsigned i; + for(i=0; i < terminator->getNumSuccessors(); i++) { + if(terminator->getSuccessor(i) == target) + break; + } + + return(i); +} + +// BLInstrumentationDag constructor initializes a DAG for the given Function. +BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F), + _counterArray(0) { +} + +// Returns the Exit->Root edge. This edge is required for creating +// directed cycles in the algorithm for moving instrumentation off of +// the spanning tree +BallLarusEdge* BLInstrumentationDag::getExitRootEdge() { + BLEdgeIterator erEdge = getExit()->succBegin(); + return(*erEdge); +} + +BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () { + BLEdgeVector callEdges; + + for( BLEdgeIterator edge = _edges.begin(), end = _edges.end(); + edge != end; edge++ ) { + if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY ) + callEdges.push_back(*edge); + } + + return callEdges; +} + +// Gets the path counter array +GlobalVariable* BLInstrumentationDag::getCounterArray() { + return _counterArray; +} + +void BLInstrumentationDag::setCounterArray(GlobalVariable* c) { + _counterArray = c; +} + +// Calculates the increment for the chords, thereby removing +// instrumentation from the spanning tree edges. Implementation is based on +// the algorithm in Figure 4 of [Ball94] +void BLInstrumentationDag::calculateChordIncrements() { + calculateChordIncrementsDfs(0, getRoot(), NULL); + + BLInstrumentationEdge* chord; + for(BLEdgeIterator chordEdge = _chordEdges.begin(), + end = _chordEdges.end(); chordEdge != end; chordEdge++) { + chord = (BLInstrumentationEdge*) *chordEdge; + chord->setIncrement(chord->getIncrement() + chord->getWeight()); + } +} + +// Updates the state when an edge has been split +void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge, + BasicBlock* newBlock) { + BallLarusNode* oldTarget = formerEdge->getTarget(); + BallLarusNode* newNode = addNode(newBlock); + formerEdge->setTarget(newNode); + newNode->addPredEdge(formerEdge); + + DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n"); + + oldTarget->removePredEdge(formerEdge); + BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0); + + if( formerEdge->getType() == BallLarusEdge::BACKEDGE || + formerEdge->getType() == BallLarusEdge::SPLITEDGE) { + newEdge->setType(formerEdge->getType()); + newEdge->setPhonyRoot(formerEdge->getPhonyRoot()); + newEdge->setPhonyExit(formerEdge->getPhonyExit()); + formerEdge->setType(BallLarusEdge::NORMAL); + formerEdge->setPhonyRoot(NULL); + formerEdge->setPhonyExit(NULL); + } +} + +// Calculates a spanning tree of the DAG ignoring cycles. Whichever +// edges are in the spanning tree will not be instrumented, but this +// implementation does not try to minimize the instrumentation overhead +// by trying to find hot edges. +void BLInstrumentationDag::calculateSpanningTree() { + std::stack<BallLarusNode*> dfsStack; + + for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end(); + nodeIt != end; nodeIt++) { + (*nodeIt)->setColor(BallLarusNode::WHITE); + } + + dfsStack.push(getRoot()); + while(dfsStack.size() > 0) { + BallLarusNode* node = dfsStack.top(); + dfsStack.pop(); + + if(node->getColor() == BallLarusNode::WHITE) + continue; + + BallLarusNode* nextNode; + bool forward = true; + BLEdgeIterator succEnd = node->succEnd(); + + node->setColor(BallLarusNode::WHITE); + // first iterate over successors then predecessors + for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd(); + edge != predEnd; edge++) { + if(edge == succEnd) { + edge = node->predBegin(); + forward = false; + } + + // Ignore split edges + if ((*edge)->getType() == BallLarusEdge::SPLITEDGE) + continue; + + nextNode = forward? (*edge)->getTarget(): (*edge)->getSource(); + if(nextNode->getColor() != BallLarusNode::WHITE) { + nextNode->setColor(BallLarusNode::WHITE); + makeEdgeSpanning((BLInstrumentationEdge*)(*edge)); + } + } + } + + for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); + edge != end; edge++) { + BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge); + // safe since createEdge is overriden + if(!instEdge->isInSpanningTree() && (*edge)->getType() + != BallLarusEdge::SPLITEDGE) + _chordEdges.push_back(instEdge); + } +} + +// Pushes initialization further down in order to group the first +// increment and initialization. +void BLInstrumentationDag::pushInitialization() { + BLInstrumentationEdge* exitRootEdge = + (BLInstrumentationEdge*) getExitRootEdge(); + exitRootEdge->setIsInitialization(true); + pushInitializationFromEdge(exitRootEdge); +} + +// Pushes the path counter increments up in order to group the last path +// number increment. +void BLInstrumentationDag::pushCounters() { + BLInstrumentationEdge* exitRootEdge = + (BLInstrumentationEdge*) getExitRootEdge(); + exitRootEdge->setIsCounterIncrement(true); + pushCountersFromEdge(exitRootEdge); +} + +// Removes phony edges from the successor list of the source, and the +// predecessor list of the target. +void BLInstrumentationDag::unlinkPhony() { + BallLarusEdge* edge; + + for(BLEdgeIterator next = _edges.begin(), + end = _edges.end(); next != end; next++) { + edge = (*next); + + if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY || + edge->getType() == BallLarusEdge::SPLITEDGE_PHONY || + edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) { + unlinkEdge(edge); + } + } +} + +// Generate a .dot graph to represent the DAG and pathNumbers +void BLInstrumentationDag::generateDotGraph() { + std::string errorInfo; + std::string functionName = getFunction().getNameStr(); + std::string filename = "pathdag." + functionName + ".dot"; + + DEBUG (dbgs() << "Writing '" << filename << "'...\n"); + raw_fd_ostream dotFile(filename.c_str(), errorInfo); + + if (!errorInfo.empty()) { + errs() << "Error opening '" << filename.c_str() <<"' for writing!"; + errs() << "\n"; + return; + } + + dotFile << "digraph " << functionName << " {\n"; + + for( BLEdgeIterator edge = _edges.begin(), end = _edges.end(); + edge != end; edge++) { + std::string sourceName = (*edge)->getSource()->getName(); + std::string targetName = (*edge)->getTarget()->getName(); + + dotFile << "\t\"" << sourceName.c_str() << "\" -> \"" + << targetName.c_str() << "\" "; + + long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement(); + + switch( (*edge)->getType() ) { + case BallLarusEdge::NORMAL: + dotFile << "[label=" << inc << "] [color=black];\n"; + break; + + case BallLarusEdge::BACKEDGE: + dotFile << "[color=cyan];\n"; + break; + + case BallLarusEdge::BACKEDGE_PHONY: + dotFile << "[label=" << inc + << "] [color=blue];\n"; + break; + + case BallLarusEdge::SPLITEDGE: + dotFile << "[color=violet];\n"; + break; + + case BallLarusEdge::SPLITEDGE_PHONY: + dotFile << "[label=" << inc << "] [color=red];\n"; + break; + + case BallLarusEdge::CALLEDGE_PHONY: + dotFile << "[label=" << inc << "] [color=green];\n"; + break; + } + } + + dotFile << "}\n"; +} + +// Allows subclasses to determine which type of Node is created. +// Override this method to produce subclasses of BallLarusNode if +// necessary. The destructor of BallLarusDag will call free on each pointer +// created. +BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) { + return( new BLInstrumentationNode(BB) ); +} + +// Allows subclasses to determine which type of Edge is created. +// Override this method to produce subclasses of BallLarusEdge if +// necessary. The destructor of BallLarusDag will call free on each pointer +// created. +BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source, + BallLarusNode* target, unsigned edgeNumber) { + // One can cast from BallLarusNode to BLInstrumentationNode since createNode + // is overriden to produce BLInstrumentationNode. + return( new BLInstrumentationEdge((BLInstrumentationNode*)source, + (BLInstrumentationNode*)target) ); +} + +// Sets the Value corresponding to the pathNumber register, constant, +// or phinode. Used by the instrumentation code to remember path +// number Values. +Value* BLInstrumentationNode::getStartingPathNumber(){ + return(_startingPathNumber); +} + +// Sets the Value of the pathNumber. Used by the instrumentation code. +void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) { + DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ? + pathNumber->getNameStr() : "unused") << "\n"); + _startingPathNumber = pathNumber; +} + +Value* BLInstrumentationNode::getEndingPathNumber(){ + return(_endingPathNumber); +} + +void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) { + DEBUG(dbgs() << " EPN-" << getName() << " <-- " + << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n"); + _endingPathNumber = pathNumber; +} + +// Get the PHINode Instruction for this node. Used by instrumentation +// code. +PHINode* BLInstrumentationNode::getPathPHI() { + return(_pathPHI); +} + +// Set the PHINode Instruction for this node. Used by instrumentation +// code. +void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) { + _pathPHI = pathPHI; +} + +// Removes the edge from the appropriate predecessor and successor +// lists. +void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) { + if(edge == getExitRootEdge()) + DEBUG(dbgs() << " Removing exit->root edge\n"); + + edge->getSource()->removeSuccEdge(edge); + edge->getTarget()->removePredEdge(edge); +} + +// Makes an edge part of the spanning tree. +void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) { + edge->setIsInSpanningTree(true); + _treeEdges.push_back(edge); +} + +// Pushes initialization and calls itself recursively. +void BLInstrumentationDag::pushInitializationFromEdge( + BLInstrumentationEdge* edge) { + BallLarusNode* target; + + target = edge->getTarget(); + if( target->getNumberPredEdges() > 1 || target == getExit() ) { + return; + } else { + for(BLEdgeIterator next = target->succBegin(), + end = target->succEnd(); next != end; next++) { + BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next; + + // Skip split edges + if (intoEdge->getType() == BallLarusEdge::SPLITEDGE) + continue; + + intoEdge->setIncrement(intoEdge->getIncrement() + + edge->getIncrement()); + intoEdge->setIsInitialization(true); + pushInitializationFromEdge(intoEdge); + } + + edge->setIncrement(0); + edge->setIsInitialization(false); + } +} + +// Pushes path counter increments up recursively. +void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) { + BallLarusNode* source; + + source = edge->getSource(); + if(source->getNumberSuccEdges() > 1 || source == getRoot() + || edge->isInitialization()) { + return; + } else { + for(BLEdgeIterator previous = source->predBegin(), + end = source->predEnd(); previous != end; previous++) { + BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous; + + // Skip split edges + if (fromEdge->getType() == BallLarusEdge::SPLITEDGE) + continue; + + fromEdge->setIncrement(fromEdge->getIncrement() + + edge->getIncrement()); + fromEdge->setIsCounterIncrement(true); + pushCountersFromEdge(fromEdge); + } + + edge->setIncrement(0); + edge->setIsCounterIncrement(false); + } +} + +// Depth first algorithm for determining the chord increments. +void BLInstrumentationDag::calculateChordIncrementsDfs(long weight, + BallLarusNode* v, BallLarusEdge* e) { + BLInstrumentationEdge* f; + + for(BLEdgeIterator treeEdge = _treeEdges.begin(), + end = _treeEdges.end(); treeEdge != end; treeEdge++) { + f = (BLInstrumentationEdge*) *treeEdge; + if(e != f && v == f->getTarget()) { + calculateChordIncrementsDfs( + calculateChordIncrementsDir(e,f)*(weight) + + f->getWeight(), f->getSource(), f); + } + if(e != f && v == f->getSource()) { + calculateChordIncrementsDfs( + calculateChordIncrementsDir(e,f)*(weight) + + f->getWeight(), f->getTarget(), f); + } + } + + for(BLEdgeIterator chordEdge = _chordEdges.begin(), + end = _chordEdges.end(); chordEdge != end; chordEdge++) { + f = (BLInstrumentationEdge*) *chordEdge; + if(v == f->getSource() || v == f->getTarget()) { + f->setIncrement(f->getIncrement() + + calculateChordIncrementsDir(e,f)*weight); + } + } +} + +// Determines the relative direction of two edges. +int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e, + BallLarusEdge* f) { + if( e == NULL) + return(1); + else if(e->getSource() == f->getTarget() + || e->getTarget() == f->getSource()) + return(1); + + return(-1); +} + +// Creates an increment constant representing incr. +ConstantInt* PathProfiler::createIncrementConstant(long incr, + int bitsize) { + return(ConstantInt::get(IntegerType::get(*Context, 32), incr)); +} + +// Creates an increment constant representing the value in +// edge->getIncrement(). +ConstantInt* PathProfiler::createIncrementConstant( + BLInstrumentationEdge* edge) { + return(createIncrementConstant(edge->getIncrement(), 32)); +} + +// Finds the insertion point after pathNumber in block. PathNumber may +// be NULL. +BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* + pathNumber) { + if(pathNumber == NULL || isa<ConstantInt>(pathNumber) + || (((Instruction*)(pathNumber))->getParent()) != block) { + return(block->getFirstInsertionPt()); + } else { + Instruction* pathNumberInst = (Instruction*) (pathNumber); + BasicBlock::iterator insertPoint; + BasicBlock::iterator end = block->end(); + + for(insertPoint = block->begin(); + insertPoint != end; insertPoint++) { + Instruction* insertInst = &(*insertPoint); + + if(insertInst == pathNumberInst) + return(++insertPoint); + } + + return(insertPoint); + } +} + +// A PHINode is created in the node, and its values initialized to -1U. +void PathProfiler::preparePHI(BLInstrumentationNode* node) { + BasicBlock* block = node->getBlock(); + BasicBlock::iterator insertPoint = block->getFirstInsertionPt(); + pred_iterator PB = pred_begin(node->getBlock()), + PE = pred_end(node->getBlock()); + PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), + std::distance(PB, PE), "pathNumber", + insertPoint ); + node->setPathPHI(phi); + node->setStartingPathNumber(phi); + node->setEndingPathNumber(phi); + + for(pred_iterator predIt = PB; predIt != PE; predIt++) { + BasicBlock* pred = (*predIt); + + if(pred != NULL) + phi->addIncoming(createIncrementConstant((long)-1, 32), pred); + } +} + +// Inserts source's pathNumber Value* into target. Target may or may not +// have multiple predecessors, and may or may not have its phiNode +// initalized. +void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source, + BLInstrumentationNode* target) { + if(target->getBlock() == NULL) + return; + + + if(target->getNumberPredEdges() <= 1) { + assert(target->getStartingPathNumber() == NULL && + "Target already has path number"); + target->setStartingPathNumber(source->getEndingPathNumber()); + target->setEndingPathNumber(source->getEndingPathNumber()); + DEBUG(dbgs() << " Passing path number" + << (source->getEndingPathNumber() ? "" : " (null)") + << " value through.\n"); + } else { + if(target->getPathPHI() == NULL) { + DEBUG(dbgs() << " Initializing PHI node for block '" + << target->getName() << "'\n"); + preparePHI(target); + } + pushValueIntoPHI(target, source); + DEBUG(dbgs() << " Passing number value into PHI for block '" + << target->getName() << "'\n"); + } +} + +// Inserts source's pathNumber Value* into the appropriate slot of +// target's phiNode. +void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target, + BLInstrumentationNode* source) { + PHINode* phi = target->getPathPHI(); + assert(phi != NULL && " Tried to push value into node with PHI, but node" + " actually had no PHI."); + phi->removeIncomingValue(source->getBlock(), false); + phi->addIncoming(source->getEndingPathNumber(), source->getBlock()); +} + +// The Value* in node, oldVal, is updated with a Value* correspodning to +// oldVal + addition. +void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node, + Value* addition, bool atBeginning) { + BasicBlock* block = node->getBlock(); + assert(node->getStartingPathNumber() != NULL); + assert(node->getEndingPathNumber() != NULL); + + BasicBlock::iterator insertPoint; + + if( atBeginning ) + insertPoint = block->getFirstInsertionPt(); + else + insertPoint = block->getTerminator(); + + DEBUG(errs() << " Creating addition instruction.\n"); + Value* newpn = BinaryOperator::Create(Instruction::Add, + node->getStartingPathNumber(), + addition, "pathNumber", insertPoint); + + node->setEndingPathNumber(newpn); + + if( atBeginning ) + node->setStartingPathNumber(newpn); +} + +// Creates a counter increment in the given node. The Value* in node is +// taken as the index into an array or hash table. The hash table access +// is a call to the runtime. +void PathProfiler::insertCounterIncrement(Value* incValue, + BasicBlock::iterator insertPoint, + BLInstrumentationDag* dag, + bool increment) { + // Counter increment for array + if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) { + // Get pointer to the array location + std::vector<Value*> gepIndices(2); + gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); + gepIndices[1] = incValue; + + GetElementPtrInst* pcPointer = + GetElementPtrInst::Create(dag->getCounterArray(), gepIndices, + "counterInc", insertPoint); + + // Load from the array - call it oldPC + LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint); + + // Test to see whether adding 1 will overflow the counter + ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc, + createIncrementConstant(0xffffffff, 32), + "isMax"); + + // Select increment for the path counter based on overflow + SelectInst* inc = + SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32), + createIncrementConstant(0,32), + "pathInc", insertPoint); + + // newPc = oldPc + inc + BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add, + oldPc, inc, "newPC", + insertPoint); + + // Store back in to the array + new StoreInst(newPc, pcPointer, insertPoint); + } else { // Counter increment for hash + std::vector<Value*> args(2); + args[0] = ConstantInt::get(Type::getInt32Ty(*Context), + currentFunctionNumber); + args[1] = incValue; + + CallInst::Create( + increment ? llvmIncrementHashFunction : llvmDecrementHashFunction, + args, "", insertPoint); + } +} + +// Inserts instrumentation for the given edge +// +// Pre: The edge's source node has pathNumber set if edge is non zero +// path number increment. +// +// Post: Edge's target node has a pathNumber set to the path number Value +// corresponding to the value of the path register after edge's +// execution. +// +// FIXME: This should be reworked so it's not recursive. +void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge, + BLInstrumentationDag* dag) { + // Mark the edge as instrumented + edge->setHasInstrumentation(true); + DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n"); + + // create a new node for this edge's instrumentation + splitCritical(edge, dag); + + BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource(); + BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget(); + BLInstrumentationNode* instrumentNode; + BLInstrumentationNode* nextSourceNode; + + bool atBeginning = false; + + // Source node has only 1 successor so any information can be simply + // inserted in to it without splitting + if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) { + DEBUG(dbgs() << " Potential instructions to be placed in: " + << sourceNode->getName() << " (at end)\n"); + instrumentNode = sourceNode; + nextSourceNode = targetNode; // ... since we never made any new nodes + } + + // The target node only has one predecessor, so we can safely insert edge + // instrumentation into it. If there was splitting, it must have been + // successful. + else if( targetNode->getNumberPredEdges() == 1 ) { + DEBUG(dbgs() << " Potential instructions to be placed in: " + << targetNode->getName() << " (at beginning)\n"); + pushValueIntoNode(sourceNode, targetNode); + instrumentNode = targetNode; + nextSourceNode = NULL; // ... otherwise we'll just keep splitting + atBeginning = true; + } + + // Somehow, splitting must have failed. + else { + errs() << "Instrumenting could not split a critical edge.\n"; + DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n"); + return; + } + + // Insert instrumentation if this is a back or split edge + if( edge->getType() == BallLarusEdge::BACKEDGE || + edge->getType() == BallLarusEdge::SPLITEDGE ) { + BLInstrumentationEdge* top = + (BLInstrumentationEdge*) edge->getPhonyRoot(); + BLInstrumentationEdge* bottom = + (BLInstrumentationEdge*) edge->getPhonyExit(); + + assert( top->isInitialization() && " Top phony edge did not" + " contain a path number initialization."); + assert( bottom->isCounterIncrement() && " Bottom phony edge" + " did not contain a path counter increment."); + + // split edge has yet to be initialized + if( !instrumentNode->getEndingPathNumber() ) { + instrumentNode->setStartingPathNumber(createIncrementConstant(0,32)); + instrumentNode->setEndingPathNumber(createIncrementConstant(0,32)); + } + + BasicBlock::iterator insertPoint = atBeginning ? + instrumentNode->getBlock()->getFirstInsertionPt() : + instrumentNode->getBlock()->getTerminator(); + + // add information from the bottom edge, if it exists + if( bottom->getIncrement() ) { + Value* newpn = + BinaryOperator::Create(Instruction::Add, + instrumentNode->getStartingPathNumber(), + createIncrementConstant(bottom), + "pathNumber", insertPoint); + instrumentNode->setEndingPathNumber(newpn); + } + + insertCounterIncrement(instrumentNode->getEndingPathNumber(), + insertPoint, dag); + + if( atBeginning ) + instrumentNode->setStartingPathNumber(createIncrementConstant(top)); + + instrumentNode->setEndingPathNumber(createIncrementConstant(top)); + + // Check for path counter increments + if( top->isCounterIncrement() ) { + insertCounterIncrement(instrumentNode->getEndingPathNumber(), + instrumentNode->getBlock()->getTerminator(),dag); + instrumentNode->setEndingPathNumber(0); + } + } + + // Insert instrumentation if this is a normal edge + else { + BasicBlock::iterator insertPoint = atBeginning ? + instrumentNode->getBlock()->getFirstInsertionPt() : + instrumentNode->getBlock()->getTerminator(); + + if( edge->isInitialization() ) { // initialize path number + instrumentNode->setEndingPathNumber(createIncrementConstant(edge)); + } else if( edge->getIncrement() ) {// increment path number + Value* newpn = + BinaryOperator::Create(Instruction::Add, + instrumentNode->getStartingPathNumber(), + createIncrementConstant(edge), + "pathNumber", insertPoint); + instrumentNode->setEndingPathNumber(newpn); + + if( atBeginning ) + instrumentNode->setStartingPathNumber(newpn); + } + + // Check for path counter increments + if( edge->isCounterIncrement() ) { + insertCounterIncrement(instrumentNode->getEndingPathNumber(), + insertPoint, dag); + instrumentNode->setEndingPathNumber(0); + } + } + + // Push it along + if (nextSourceNode && instrumentNode->getEndingPathNumber()) + pushValueIntoNode(instrumentNode, nextSourceNode); + + // Add all the successors + for( BLEdgeIterator next = targetNode->succBegin(), + end = targetNode->succEnd(); next != end; next++ ) { + // So long as it is un-instrumented, add it to the list + if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() ) + insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag); + else + DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next) + << " already instrumented.\n"); + } +} + +// Inserts instrumentation according to the marked edges in dag. Phony edges +// must be unlinked from the DAG, but accessible from the backedges. Dag +// must have initializations, path number increments, and counter increments +// present. +// +// Counter storage is created here. +void PathProfiler::insertInstrumentation( + BLInstrumentationDag& dag, Module &M) { + + BLInstrumentationEdge* exitRootEdge = + (BLInstrumentationEdge*) dag.getExitRootEdge(); + insertInstrumentationStartingAt(exitRootEdge, &dag); + + // Iterate through each call edge and apply the appropriate hash increment + // and decrement functions + BLEdgeVector callEdges = dag.getCallPhonyEdges(); + for( BLEdgeIterator edge = callEdges.begin(), + end = callEdges.end(); edge != end; edge++ ) { + BLInstrumentationNode* node = + (BLInstrumentationNode*)(*edge)->getSource(); + BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt(); + + // Find the first function call + while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call ) + insertPoint++; + + DEBUG(dbgs() << "\nInstrumenting method call block '" + << node->getBlock()->getNameStr() << "'\n"); + DEBUG(dbgs() << " Path number initialized: " + << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n"); + + Value* newpn; + if( node->getStartingPathNumber() ) { + long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement(); + if ( inc ) + newpn = BinaryOperator::Create(Instruction::Add, + node->getStartingPathNumber(), + createIncrementConstant(inc,32), + "pathNumber", insertPoint); + else + newpn = node->getStartingPathNumber(); + } else { + newpn = (Value*)createIncrementConstant( + ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32); + } + + insertCounterIncrement(newpn, insertPoint, &dag); + insertCounterIncrement(newpn, node->getBlock()->getTerminator(), + &dag, false); + } +} + +// Entry point of the module +void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, + Function &F, Module &M) { + // Build DAG from CFG + BLInstrumentationDag dag = BLInstrumentationDag(F); + dag.init(); + + // give each path a unique integer value + dag.calculatePathNumbers(); + + // modify path increments to increase the efficiency + // of instrumentation + dag.calculateSpanningTree(); + dag.calculateChordIncrements(); + dag.pushInitialization(); + dag.pushCounters(); + dag.unlinkPhony(); + + // potentially generate .dot graph for the dag + if (DotPathDag) + dag.generateDotGraph (); + + // Should we store the information in an array or hash + if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) { + Type* t = ArrayType::get(Type::getInt32Ty(*Context), + dag.getNumberOfPaths()); + + dag.setCounterArray(new GlobalVariable(M, t, false, + GlobalValue::InternalLinkage, + Constant::getNullValue(t), "")); + } + + insertInstrumentation(dag, M); + + // Add to global function reference table + unsigned type; + Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context); + + if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) + type = ProfilingArray; + else + type = ProfilingHash; + + std::vector<Constant*> entryArray(3); + entryArray[0] = createIncrementConstant(type,32); + entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32); + entryArray[2] = dag.getCounterArray() ? + ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) : + Constant::getNullValue(voidPtr); + + StructType* at = ftEntryTypeBuilder::get(*Context); + ConstantStruct* functionEntry = + (ConstantStruct*)ConstantStruct::get(at, entryArray); + ftInit.push_back(functionEntry); +} + +// Output the bitcode if we want to observe instrumentation changess +#define PRINT_MODULE dbgs() << \ + "\n\n============= MODULE BEGIN ===============\n" << M << \ + "\n============== MODULE END ================\n" + +bool PathProfiler::runOnModule(Module &M) { + Context = &M.getContext(); + + DEBUG(dbgs() + << "****************************************\n" + << "****************************************\n" + << "** **\n" + << "** PATH PROFILING INSTRUMENTATION **\n" + << "** **\n" + << "****************************************\n" + << "****************************************\n"); + + // No main, no instrumentation! + Function *Main = M.getFunction("main"); + + // Using fortran? ... this kind of works + if (!Main) + Main = M.getFunction("MAIN__"); + + if (!Main) { + errs() << "WARNING: cannot insert path profiling into a module" + << " with no main function!\n"; + return false; + } + + llvmIncrementHashFunction = M.getOrInsertFunction( + "llvm_increment_path_count", + Type::getVoidTy(*Context), // return type + Type::getInt32Ty(*Context), // function number + Type::getInt32Ty(*Context), // path number + NULL ); + + llvmDecrementHashFunction = M.getOrInsertFunction( + "llvm_decrement_path_count", + Type::getVoidTy(*Context), // return type + Type::getInt32Ty(*Context), // function number + Type::getInt32Ty(*Context), // path number + NULL ); + + std::vector<Constant*> ftInit; + unsigned functionNumber = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) { + if (F->isDeclaration()) + continue; + + DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n"); + functionNumber++; + + // set function number + currentFunctionNumber = functionNumber; + runOnFunction(ftInit, *F, M); + } + + Type *t = ftEntryTypeBuilder::get(*Context); + ArrayType* ftArrayType = ArrayType::get(t, ftInit.size()); + Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit); + + DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n"); + + GlobalVariable* functionTable = + new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage, + ftInitConstant, "functionPathTable"); + Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0); + InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable, + PointerType::getUnqual(eltType)); + + DEBUG(PRINT_MODULE); + + return true; +} + +// If this edge is a critical edge, then inserts a node at this edge. +// This edge becomes the first edge, and a new BallLarusEdge is created. +// Returns true if the edge was split +bool PathProfiler::splitCritical(BLInstrumentationEdge* edge, + BLInstrumentationDag* dag) { + unsigned succNum = edge->getSuccessorNumber(); + BallLarusNode* sourceNode = edge->getSource(); + BallLarusNode* targetNode = edge->getTarget(); + BasicBlock* sourceBlock = sourceNode->getBlock(); + BasicBlock* targetBlock = targetNode->getBlock(); + + if(sourceBlock == NULL || targetBlock == NULL + || sourceNode->getNumberSuccEdges() <= 1 + || targetNode->getNumberPredEdges() == 1 ) { + return(false); + } + + TerminatorInst* terminator = sourceBlock->getTerminator(); + + if( SplitCriticalEdge(terminator, succNum, this, false)) { + BasicBlock* newBlock = terminator->getSuccessor(succNum); + dag->splitUpdate(edge, newBlock); + return(true); + } else + return(false); +}
diff --git a/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 1a30e9b..de57cd1 100644 --- a/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -22,12 +22,13 @@ #include "llvm/Module.h" void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, - GlobalValue *Array) { + GlobalValue *Array, + PointerType *arrayType) { LLVMContext &Context = MainFn->getContext(); - const Type *ArgVTy = + Type *ArgVTy = PointerType::getUnqual(Type::getInt8PtrTy(Context)); - const PointerType *UIntPtr = - Type::getInt32PtrTy(Context); + PointerType *UIntPtr = arrayType ? arrayType : + Type::getInt32PtrTy(Context); Module &M = *MainFn->getParent(); Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), Type::getInt32Ty(Context), @@ -50,8 +51,7 @@ Constant::getNullValue(Type::getInt32Ty(Context))); unsigned NumElements = 0; if (Array) { - Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], - GEPIndices.size()); + Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices); NumElements = cast<ArrayType>(Array->getType()->getElementType())->getNumElements(); } else { @@ -61,8 +61,7 @@ } Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); - CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), - "newargc", InsertPos); + CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; @@ -71,9 +70,9 @@ case 2: AI = MainFn->arg_begin(); ++AI; if (AI->getType() != ArgVTy) { - Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, + Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, false); - InitCall->setArgOperand(1, + InitCall->setArgOperand(1, CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall)); } else { InitCall->setArgOperand(1, AI); @@ -93,7 +92,7 @@ } opcode = CastInst::getCastOpcode(AI, true, Type::getInt32Ty(Context), true); - InitCall->setArgOperand(0, + InitCall->setArgOperand(0, CastInst::Create(opcode, AI, Type::getInt32Ty(Context), "argc.cast", InitCall)); } else { @@ -106,9 +105,10 @@ } void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray) { + GlobalValue *CounterArray, bool beginning) { // Insert the increment after any alloca or PHI instructions... - BasicBlock::iterator InsertPos = BB->getFirstNonPHI(); + BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() : + BB->getTerminator(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; @@ -118,9 +118,8 @@ std::vector<Constant*> Indices(2); Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); - Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], - Indices.size()); + Constant *ElementPtr = + ConstantExpr::getGetElementPtr(CounterArray, Indices); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); @@ -129,3 +128,42 @@ "NewFuncCounter", InsertPos); new StoreInst(NewVal, ElementPtr, InsertPos); } + +void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { + // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those + // types. + Type *GlobalDtorElems[2] = { + Type::getInt32Ty(Mod->getContext()), + FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() + }; + StructType *GlobalDtorElemTy = + StructType::get(Mod->getContext(), GlobalDtorElems, false); + + // Construct the new element we'll be adding. + Constant *Elem[2] = { + ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535), + ConstantExpr::getBitCast(Callee, GlobalDtorElems[1]) + }; + + // If llvm.global_dtors exists, make a copy of the things in its list and + // delete it, to replace it with one that has a larger array type. + std::vector<Constant *> dtors; + if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) { + if (ConstantArray *InitList = + dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) { + for (unsigned i = 0, e = InitList->getType()->getNumElements(); + i != e; ++i) + dtors.push_back(cast<Constant>(InitList->getOperand(i))); + } + GlobalDtors->eraseFromParent(); + } + + // Build up llvm.global_dtors with our new item in it. + GlobalVariable *GlobalDtors = new GlobalVariable( + *Mod, ArrayType::get(GlobalDtorElemTy, 1), false, + GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors"); + + dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem)); + GlobalDtors->setInitializer(ConstantArray::get( + cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors)); +}
diff --git a/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.h b/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.h index 94efffe..09b2217 100644 --- a/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.h +++ b/src/LLVM/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -18,14 +18,19 @@ #define PROFILINGUTILS_H namespace llvm { + class BasicBlock; class Function; class GlobalValue; - class BasicBlock; + class Module; + class PointerType; void InsertProfilingInitCall(Function *MainFn, const char *FnName, - GlobalValue *Arr = 0); + GlobalValue *Arr = 0, + PointerType *arrayType = 0); void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray); + GlobalValue *CounterArray, + bool beginning = true); + void InsertProfilingShutdownCall(Function *Callee, Module *Mod); } #endif
diff --git a/src/LLVM/lib/Transforms/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/PACKAGE.vcxproj new file mode 100644 index 0000000..089573d --- /dev/null +++ b/src/LLVM/lib/Transforms/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Transforms;..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../.. -B../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Scalar/ADCE.cpp b/src/LLVM/lib/Transforms/Scalar/ADCE.cpp index 12bb08d..ba214d1 100644 --- a/src/LLVM/lib/Transforms/Scalar/ADCE.cpp +++ b/src/LLVM/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,9 @@ namespace { struct ADCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ADCE() : FunctionPass(ID) {} + ADCE() : FunctionPass(ID) { + initializeADCEPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function& F); @@ -45,7 +47,7 @@ } char ADCE::ID = 0; -INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false); +INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false) bool ADCE::runOnFunction(Function& F) { SmallPtrSet<Instruction*, 128> alive; @@ -54,7 +56,8 @@ // Collect the set of "root" instructions that are known live. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isa<TerminatorInst>(I.getInstructionIterator()) || - ISA_DEBUG_INFO_INTRINSIC(I.getInstructionIterator()) || + isa<DbgInfoIntrinsic>(I.getInstructionIterator()) || + isa<LandingPadInst>(I.getInstructionIterator()) || I->mayHaveSideEffects()) { alive.insert(I.getInstructionIterator()); worklist.push_back(I.getInstructionIterator()); @@ -63,7 +66,6 @@ // Propagate liveness backwards to operands. while (!worklist.empty()) { Instruction* curr = worklist.pop_back_val(); - for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end(); OI != OE; ++OI) if (Instruction* Inst = dyn_cast<Instruction>(OI))
diff --git a/src/LLVM/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/src/LLVM/lib/Transforms/Scalar/BasicBlockPlacement.cpp new file mode 100644 index 0000000..cee5502 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -0,0 +1,152 @@ +//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a very simple profile guided basic block placement +// algorithm. The idea is to put frequently executed blocks together at the +// start of the function, and hopefully increase the number of fall-through +// conditional branches. If there is no profile information for a particular +// function, this pass basically orders blocks in depth-first order +// +// The algorithm implemented here is basically "Algo1" from "Profile Guided Code +// Positioning" by Pettis and Hansen, except that it uses basic block counts +// instead of edge counts. This should be improved in many ways, but is very +// simple for now. +// +// Basically we "place" the entry block, then loop over all successors in a DFO, +// placing the most frequently executed successor until we run out of blocks. I +// told you this was _extremely_ simplistic. :) This is also much slower than it +// could be. When it becomes important, this pass will be rewritten to use a +// better algorithm, and then we can worry about efficiency. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "block-placement" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Scalar.h" +#include <set> +using namespace llvm; + +STATISTIC(NumMoved, "Number of basic blocks moved"); + +namespace { + struct BlockPlacement : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BlockPlacement() : FunctionPass(ID) { + initializeBlockPlacementPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<ProfileInfo>(); + //AU.addPreserved<ProfileInfo>(); // Does this work? + } + private: + /// PI - The profile information that is guiding us. + /// + ProfileInfo *PI; + + /// NumMovedBlocks - Every time we move a block, increment this counter. + /// + unsigned NumMovedBlocks; + + /// PlacedBlocks - Every time we place a block, remember it so we don't get + /// into infinite loops. + std::set<BasicBlock*> PlacedBlocks; + + /// InsertPos - This an iterator to the next place we want to insert a + /// block. + Function::iterator InsertPos; + + /// PlaceBlocks - Recursively place the specified blocks and any unplaced + /// successors. + void PlaceBlocks(BasicBlock *BB); + }; +} + +char BlockPlacement::ID = 0; +INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement", + "Profile Guided Basic Block Placement", false, false) +INITIALIZE_AG_DEPENDENCY(ProfileInfo) +INITIALIZE_PASS_END(BlockPlacement, "block-placement", + "Profile Guided Basic Block Placement", false, false) + +FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); } + +bool BlockPlacement::runOnFunction(Function &F) { + PI = &getAnalysis<ProfileInfo>(); + + NumMovedBlocks = 0; + InsertPos = F.begin(); + + // Recursively place all blocks. + PlaceBlocks(F.begin()); + + PlacedBlocks.clear(); + NumMoved += NumMovedBlocks; + return NumMovedBlocks != 0; +} + + +/// PlaceBlocks - Recursively place the specified blocks and any unplaced +/// successors. +void BlockPlacement::PlaceBlocks(BasicBlock *BB) { + assert(!PlacedBlocks.count(BB) && "Already placed this block!"); + PlacedBlocks.insert(BB); + + // Place the specified block. + if (&*InsertPos != BB) { + // Use splice to move the block into the right place. This avoids having to + // remove the block from the function then readd it, which causes a bunch of + // symbol table traffic that is entirely pointless. + Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList(); + Blocks.splice(InsertPos, Blocks, BB); + + ++NumMovedBlocks; + } else { + // This block is already in the right place, we don't have to do anything. + ++InsertPos; + } + + // Keep placing successors until we run out of ones to place. Note that this + // loop is very inefficient (N^2) for blocks with many successors, like switch + // statements. FIXME! + while (1) { + // Okay, now place any unplaced successors. + succ_iterator SI = succ_begin(BB), E = succ_end(BB); + + // Scan for the first unplaced successor. + for (; SI != E && PlacedBlocks.count(*SI); ++SI) + /*empty*/; + if (SI == E) return; // No more successors to place. + + double MaxExecutionCount = PI->getExecutionCount(*SI); + BasicBlock *MaxSuccessor = *SI; + + // Scan for more frequently executed successors + for (; SI != E; ++SI) + if (!PlacedBlocks.count(*SI)) { + double Count = PI->getExecutionCount(*SI); + if (Count > MaxExecutionCount || + // Prefer to not disturb the code. + (Count == MaxExecutionCount && *SI == &*InsertPos)) { + MaxExecutionCount = Count; + MaxSuccessor = *SI; + } + } + + // Now that we picked the maximally executed successor, place it. + PlaceBlocks(MaxSuccessor); + } +}
diff --git a/src/LLVM/lib/Transforms/Scalar/CodeGenPrepare.cpp b/src/LLVM/lib/Transforms/Scalar/CodeGenPrepare.cpp index 4be02ec..f8f18b2 100644 --- a/src/LLVM/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/src/LLVM/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,6 +22,8 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -31,92 +33,137 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/ValueHandle.h" using namespace llvm; using namespace llvm::PatternMatch; +STATISTIC(NumBlocksElim, "Number of blocks eliminated"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " + "sunken Cmps"); +STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " + "of sunken Casts"); +STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " + "computations were sunk"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); +STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); + +static cl::opt<bool> DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); + namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + DominatorTree *DT; ProfileInfo *PFI; + + /// CurInstIterator - As we scan instructions optimizing them, this is the + /// next instruction to optimize. Xforms that can invalidate this should + /// update it. + BasicBlock::iterator CurInstIterator; - /// BackEdges - Keep a set of all the loop back edges. - /// - SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. + DenseMap<Value*, Value*> SunkAddrs; + + /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to + /// be updated. + bool ModifiedDT; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) - : FunctionPass(ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); AU.addPreserved<ProfileInfo>(); } - virtual void releaseMemory() { - BackEdges.clear(); - } - private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); bool OptimizeBlock(BasicBlock &BB); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, - DenseMap<Value*,Value*> &SunkAddrs); - bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, - DenseMap<Value*,Value*> &SunkAddrs); + bool OptimizeInst(Instruction *I); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); + bool OptimizeInlineAsmInst(CallInst *CS); bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); - void findLoopBackEdges(const Function &F); + bool DupRetToEnableTailCallOpts(ReturnInst *RI); + bool PlaceDbgValues(Function &F); }; } char CodeGenPrepare::ID = 0; INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", - "Optimize for code generation", false, false); + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); } -/// findLoopBackEdges - Do a DFS walk to find loop back edges. -/// -void CodeGenPrepare::findLoopBackEdges(const Function &F) { - SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; - FindFunctionBackedges(F, Edges); - - BackEdges.insert(Edges.begin(), Edges.end()); -} - - bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + ModifiedDT = false; + DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); + // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); - // Now find loop back edges. - findLoopBackEdges(F); + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + EverMadeChange |= PlaceDbgValues(F); bool MadeChange = true; while (MadeChange) { MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; MadeChange |= OptimizeBlock(*BB); + } EverMadeChange |= MadeChange; } + + SunkAddrs.clear(); + + if (!DisableBranchOpts) { + MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + MadeChange |= ConstantFoldTerminator(BB, true); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + + if (ModifiedDT && DT) + DT->DT->recalculate(F); + return EverMadeChange; } @@ -141,12 +188,12 @@ BasicBlock::iterator BBI = BI; if (BBI != BB->begin()) { --BBI; - while (ISA_DEBUG_INFO_INTRINSIC(BBI)) { + while (isa<DbgInfoIntrinsic>(BBI)) { if (BBI == BB->begin()) break; --BBI; } - if (!ISA_DEBUG_INFO_INTRINSIC(BBI) && !isa<PHINode>(BBI)) + if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) continue; } @@ -291,114 +338,23 @@ // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); + if (DT && !ModifiedDT) { + BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); + BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); + BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); + DT->changeImmediateDominator(DestBB, NewIDom); + DT->eraseNode(BB); + } if (PFI) { PFI->replaceAllUses(BB, DestBB); PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); + ++NumBlocksElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -/// FindReusablePredBB - Check all of the predecessors of the block DestPHI -/// lives in to see if there is a block that we can reuse as a critical edge -/// from TIBB. -static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { - BasicBlock *Dest = DestPHI->getParent(); - - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector<Value*, 32> TIPHIValues; - - /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. - unsigned TIBBEntryNo = 0; - - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { - BasicBlock *Pred = DestPHI->getIncomingBlock(pi); - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional()) - continue; - // Must be empty other than the branch and debug info. - BasicBlock::iterator I = Pred->begin(); - while (ISA_DEBUG_INFO_INTRINSIC(I)) - I++; - if (&*I != PredBr) - continue; - // Cannot be the entry block; its label does not get emitted. - if (Pred == &Dest->getParent()->getEntryBlock()) - continue; - - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same effect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - unsigned PredEntryNo = pi; - - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) { - if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) - TIBBEntryNo = PN->getBasicBlockIndex(TIBB); - TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); - } - - // If the PHI entry doesn't work, we can't use this pred. - if (PN->getIncomingBlock(PredEntryNo) != Pred) - PredEntryNo = PN->getBasicBlockIndex(Pred); - - if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { - FoundMatch = false; - break; - } - } - - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) - return Pred; - } - return 0; -} - - -/// SplitEdgeNicely - Split the critical edge from TI to its specified -/// successor if it will improve codegen. We only do this if the successor has -/// phi nodes (otherwise critical edges are ok). If there is already another -/// predecessor of the succ that is empty (and thus has no phi nodes), use it -/// instead of introducing a new block. -static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, - SmallSet<std::pair<const BasicBlock*, - const BasicBlock*>, 8> &BackEdges, - Pass *P) { - BasicBlock *TIBB = TI->getParent(); - BasicBlock *Dest = TI->getSuccessor(SuccNum); - assert(isa<PHINode>(Dest->begin()) && - "This should only be called if Dest has a PHI!"); - PHINode *DestPHI = cast<PHINode>(Dest->begin()); - - // As a hack, never split backedges of loops. Even though the copy for any - // PHIs inserted on the backedge would be dead for exits from the loop, we - // assume that the cost of *splitting* the backedge would be too high. - if (BackEdges.count(std::make_pair(TIBB, Dest))) - return; - - if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { - ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PFI) - PFI->splitEdge(TIBB, Dest, ReuseBB); - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, ReuseBB); - return; - } - - SplitCriticalEdge(TI, SuccNum, P, true); -} - - /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), /// sink it into user blocks to reduce the number of virtual @@ -422,9 +378,11 @@ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. @@ -459,16 +417,16 @@ CastInst *&InsertedCast = InsertedCasts[UserBB]; if (!InsertedCast) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedCast = - CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), + CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", InsertPt); MadeChange = true; } // Replace a use of the cast with a use of the new cast. TheUse = InsertedCast; + ++NumCastUses; } // If we removed all uses, nuke the cast. @@ -515,17 +473,17 @@ CmpInst *&InsertedCmp = InsertedCmps[UserBB]; if (!InsertedCmp) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedCmp = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), - CI->getOperand(1), InsertPt); + CI->getOperand(1), "", InsertPt); MadeChange = true; } // Replace a use of the cmp with a use of the new cmp. TheUse = InsertedCmp; + ++NumCmpUses; } // If we removed all uses, nuke the cmp. @@ -535,20 +493,204 @@ return MadeChange; } +namespace { +class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { +protected: + void replaceCall(Value *With) { + CI->replaceAllUsesWith(With); + CI->eraseFromParent(); + } + bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) + return SizeCI->isAllOnesValue(); + return false; + } +}; +} // end anonymous namespace + bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { + BasicBlock *BB = CI->getParent(); + + // Lower inline assembly if we can. + // If we found an inline asm expession, and if the target knows how to + // lower it to normal LLVM code, do so now. + if (TLI && isa<InlineAsm>(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + // Avoid invalidating the iterator. + CurInstIterator = BB->begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + return true; + } + // Sink address computing for memory operands into the block. + if (OptimizeInlineAsmInst(CI)) + return true; + } + // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); - const Type *ReturnTy = CI->getType(); + Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - CI->replaceAllUsesWith(RetVal); - CI->eraseFromParent(); + + // Substituting this can cause recursive simplifications, which can + // invalidate our iterator. Use a WeakVH to hold onto it in case this + // happens. + WeakVH IterHandle(CurInstIterator); + + ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, + ModifiedDT ? 0 : DT); + + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurInstIterator) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } return true; } + + // From here on out we're working with named functions. + if (CI->getCalledFunction() == 0) return false; + + // We'll need TargetData from here on out. + const TargetData *TD = TLI ? TLI->getTargetData() : 0; + if (!TD) return false; - return false; + // Lower all default uses of _chk calls. This is very similar + // to what InstCombineCalls does, but here we are only lowering calls + // that have the default "don't know" as the objectsize. Anything else + // should be left alone. + CodeGenPrepareFortifiedLibCalls Simplifier; + return Simplifier.fold(CI, TD); } + +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// +/// => +/// +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// +bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { + if (!TLI) + return false; + + Value *V = RI->getReturnValue(); + PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; + if (V && !PN) + return false; + + BasicBlock *BB = RI->getParent(); + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa<DbgInfoIntrinsic>(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector<CallInst*, 4> TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet<BasicBlock*, 4> VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI)) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast<CallInst>(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// @@ -571,13 +713,83 @@ /// This method is used to optimize both load/store and inline asms with memory /// operands. bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - const Type *AccessTy, - DenseMap<Value*,Value*> &SunkAddrs) { - // Figure out what addressing mode will be built up for this operation. + Type *AccessTy) { + Value *Repl = Addr; + + // Try to collapse single-value PHI nodes. This is necessary to undo + // unprofitable PRE transformations. + SmallVector<Value*, 8> worklist; + SmallPtrSet<Value*, 16> Visited; + worklist.push_back(Addr); + + // Use a worklist to iteratively look through PHI nodes, and ensure that + // the addressing mode obtained from the non-PHI roots of the graph + // are equivalent. + Value *Consensus = 0; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; SmallVector<Instruction*, 16> AddrModeInsts; - ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, - AddrModeInsts, *TLI); + ExtAddrMode AddrMode; + while (!worklist.empty()) { + Value *V = worklist.back(); + worklist.pop_back(); + + // Break use-def graph loops. + if (!Visited.insert(V)) { + Consensus = 0; + break; + } + + // For a PHI node, push all of its incoming values. + if (PHINode *P = dyn_cast<PHINode>(V)) { + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) + worklist.push_back(P->getIncomingValue(i)); + continue; + } + + // For non-PHIs, determine the addressing mode being computed. + SmallVector<Instruction*, 16> NewAddrModeInsts; + ExtAddrMode NewAddrMode = + AddressingModeMatcher::Match(V, AccessTy, MemoryInst, + NewAddrModeInsts, *TLI); + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { + Consensus = V; + NumUsesConsensus = NumUses; + AddrModeInsts = NewAddrModeInsts; + } + continue; + } + + Consensus = 0; + break; + } + + // If the addressing mode couldn't be determined, or if multiple different + // ones were determined, bail out now. + if (!Consensus) return false; + // Check to see if any of the instructions supersumed by this addr mode are // non-local to I's BB. bool AnyNonLocal = false; @@ -597,7 +809,7 @@ // Insert this computation right after this user. Since our caller is // scanning from the top of the BB to the bottom, reuse of the expr are // guaranteed to happen later. - BasicBlock::iterator InsertPt = MemoryInst; + IRBuilder<> Builder(MemoryInst); // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already @@ -608,11 +820,11 @@ DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) - SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), InsertPt); + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); - const Type *IntPtrTy = + Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); Value *Result = 0; @@ -625,10 +837,9 @@ if (AddrMode.BaseReg) { Value *V = AddrMode.BaseReg; if (V->getType()->isPointerTy()) - V = new PtrToIntInst(V, IntPtrTy, InsertPt); + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); if (V->getType() != IntPtrTy) - V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, - InsertPt); + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); Result = V; } @@ -638,29 +849,27 @@ if (V->getType() == IntPtrTy) { // done. } else if (V->getType()->isPointerTy()) { - V = new PtrToIntInst(V, IntPtrTy, InsertPt); + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth()) { - V = new TruncInst(V, IntPtrTy, InsertPt); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - V = new SExtInst(V, IntPtrTy, InsertPt); + V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) - V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, - AddrMode.Scale), - InsertPt); + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } // Add in the BaseGV if present. if (AddrMode.BaseGV) { - Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, - InsertPt); + Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } @@ -669,7 +878,7 @@ if (AddrMode.BaseOffs) { Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } @@ -677,63 +886,57 @@ if (Result == 0) SunkAddr = Constant::getNullValue(Addr->getType()); else - SunkAddr = new IntToPtrInst(Result, Addr->getType(), InsertPt); + SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); } - MemoryInst->replaceUsesOfWith(Addr, SunkAddr); + MemoryInst->replaceUsesOfWith(Repl, SunkAddr); - if (Addr->use_empty()) { - RecursivelyDeleteTriviallyDeadInstructions(Addr); - // This address is now available for reassignment, so erase the table entry; - // we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; + // If we have no uses, recursively delete the value and all dead instructions + // using it. + if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + + RecursivelyDeleteTriviallyDeadInstructions(Repl); + + if (IterHandle != CurInstIterator) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } else { + // This address is now available for reassignment, so erase the table + // entry; we don't want to match some completely different instruction. + SunkAddrs[Addr] = 0; + } } + ++NumMemoryInsts; return true; } /// OptimizeInlineAsmInst - If there are any memory operands, use /// OptimizeMemoryInst to sink their address computing into the block when /// possible / profitable. -bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, - DenseMap<Value*,Value*> &SunkAddrs) { +bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; - InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); - // Do a prepass over the constraints, canonicalizing them, and building up the - // ConstraintOperands list. - std::vector<InlineAsm::ConstraintInfo> - ConstraintInfos = IA->ParseConstraints(); - - /// ConstraintOperands - Information about all of the constraints. - std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands. - push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); - TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); - - // Compute the value type for each operand. - switch (OpInfo.Type) { - case InlineAsm::isOutput: - if (OpInfo.isIndirect) - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); - break; - case InlineAsm::isInput: - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); - break; - case InlineAsm::isClobber: - // Nothing to do. - break; - } - + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI->ParseConstraints(CS); + unsigned ArgNo = 0; + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { - Value *OpVal = OpInfo.CallOperandVal; - MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); - } + Value *OpVal = CS->getArgOperand(ArgNo++); + MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); + } else if (OpInfo.Type == InlineAsm::isInput) + ArgNo++; } return MadeChange; @@ -755,7 +958,9 @@ // If the load has other users and the truncate is not free, this probably // isn't worthwhile. if (!LI->hasOneUse() && - TLI && !TLI->isTruncateFree(I->getType(), LI->getType())) + TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || + !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && + !TLI->isTruncateFree(I->getType(), LI->getType())) return false; // Check whether the target supports casts folded into loads. @@ -773,13 +978,14 @@ // can fold it. I->removeFromParent(); I->insertAfter(LI); + ++NumExtsMoved; return true; } bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); - // If both result of the {s|z}xt and its source are live out, rewrite all + // If the result of a {s|z}ext and its source are both live out, rewrite all // other uses of the source with result of extension. Value *Src = I->getOperand(0); if (Src->hasOneUse()) @@ -837,89 +1043,133 @@ Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; if (!InsertedTrunc) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - - InsertedTrunc = new TruncInst(I, Src->getType(), InsertPt); + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); } // Replace a use of the {s|z}ext source with a use of the result. TheUse = InsertedTrunc; - + ++NumExtUses; MadeChange = true; } return MadeChange; } +bool CodeGenPrepare::OptimizeInst(Instruction *I) { + if (PHINode *P = dyn_cast<PHINode>(I)) { + // It is possible for very late stage optimizations (such as SimplifyCFG) + // to introduce PHI nodes too late to be cleaned up. If we detect such a + // trivial PHI, go ahead and zap it here. + if (Value *V = SimplifyInstruction(P)) { + P->replaceAllUsesWith(V); + P->eraseFromParent(); + ++NumPHIsElim; + return true; + } + return false; + } + + if (CastInst *CI = dyn_cast<CastInst>(I)) { + // If the source of the cast is a constant, then this should have + // already been constant folded. The only reason NOT to constant fold + // it is if something (e.g. LSR) was careful to place the constant + // evaluation in a block other than then one that uses it (e.g. to hoist + // the address of globals out of a loop). If this is the case, we don't + // want to forward-subst the cast. + if (isa<Constant>(CI->getOperand(0))) + return false; + + if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) + return true; + + if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { + bool MadeChange = MoveExtToFormExtLoad(I); + return MadeChange | OptimizeExtUses(I); + } + return false; + } + + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + return OptimizeCmpExpression(CI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + return false; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, SI->getOperand(1), + SI->getOperand(0)->getType()); + return false; + } + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + if (GEPI->hasAllZeroIndices()) { + /// The GEP operand must be a pointer, so must its result -> BitCast + Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NC); + GEPI->eraseFromParent(); + ++NumGEPsElim; + OptimizeInst(NC); + return true; + } + return false; + } + + if (CallInst *CI = dyn_cast<CallInst>(I)) + return OptimizeCallInst(CI); + + if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) + return DupRetToEnableTailCallOpts(RI); + + return false; +} + // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { + SunkAddrs.clear(); bool MadeChange = false; - // Split all critical edges where the dest block has a PHI. - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); - } - } + CurInstIterator = BB.begin(); + for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) + MadeChange |= OptimizeInst(CurInstIterator++); - // Keep track of non-local addresses that have been sunk into this block. - // This allows us to avoid inserting duplicate code for blocks with multiple - // load/stores of the same address. - DenseMap<Value*, Value*> SunkAddrs; + return MadeChange; +} - for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { - Instruction *I = BBI++; - - if (CastInst *CI = dyn_cast<CastInst>(I)) { - // If the source of the cast is a constant, then this should have - // already been constant folded. The only reason NOT to constant fold - // it is if something (e.g. LSR) was careful to place the constant - // evaluation in a block other than then one that uses it (e.g. to hoist - // the address of globals out of a loop). If this is the case, we don't - // want to forward-subst the cast. - if (isa<Constant>(CI->getOperand(0))) +// llvm.dbg.value is far away from the value then iSel may not be able +// handle it properly. iSel will drop llvm.dbg.value if it can not +// find a node corresponding to the value. +bool CodeGenPrepare::PlaceDbgValues(Function &F) { + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + Instruction *PrevNonDbgInst = NULL; + for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { + Instruction *Insn = BI; ++BI; + DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); + if (!DVI) { + PrevNonDbgInst = Insn; continue; - - bool Change = false; - if (TLI) { - Change = OptimizeNoopCopyExpression(CI, *TLI); - MadeChange |= Change; } - if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) { - MadeChange |= MoveExtToFormExtLoad(I); - MadeChange |= OptimizeExtUses(I); - } - } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { - MadeChange |= OptimizeCmpExpression(CI); - } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (TLI) - MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), - SunkAddrs); - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (TLI) - MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), - SI->getOperand(0)->getType(), - SunkAddrs); - } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { - if (GEPI->hasAllZeroIndices()) { - /// The GEP operand must be a pointer, so must its result -> BitCast - Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), - GEPI); - GEPI->replaceAllUsesWith(NC); - GEPI->eraseFromParent(); + Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); + if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); MadeChange = true; - BBI = NC; + ++NumDbgValueMoved; } - } else if (CallInst *CI = dyn_cast<CallInst>(I)) { - MadeChange |= OptimizeCallInst(CI); } } - return MadeChange; }
diff --git a/src/LLVM/lib/Transforms/Scalar/ConstantProp.cpp b/src/LLVM/lib/Transforms/Scalar/ConstantProp.cpp new file mode 100644 index 0000000..664c3f6 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/ConstantProp.cpp
@@ -0,0 +1,91 @@ +//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements constant propagation and merging: +// +// Specifically, this: +// * Converts instructions like "add int 1, 2" into 3 +// +// Notice that: +// * This pass has a habit of making definitions be dead. It is a good idea +// to run a DIE pass sometime after running this pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "constprop" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Constant.h" +#include "llvm/Instruction.h" +#include "llvm/Pass.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/Statistic.h" +#include <set> +using namespace llvm; + +STATISTIC(NumInstKilled, "Number of instructions killed"); + +namespace { + struct ConstantPropagation : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ConstantPropagation() : FunctionPass(ID) { + initializeConstantPropagationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; +} + +char ConstantPropagation::ID = 0; +INITIALIZE_PASS(ConstantPropagation, "constprop", + "Simple constant propagation", false, false) + +FunctionPass *llvm::createConstantPropagationPass() { + return new ConstantPropagation(); +} + + +bool ConstantPropagation::runOnFunction(Function &F) { + // Initialize the worklist to all of the instructions ready to process... + std::set<Instruction*> WorkList; + for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + WorkList.insert(&*i); + } + bool Changed = false; + + while (!WorkList.empty()) { + Instruction *I = *WorkList.begin(); + WorkList.erase(WorkList.begin()); // Get an element from the worklist... + + if (!I->use_empty()) // Don't muck with dead instructions... + if (Constant *C = ConstantFoldInstruction(I)) { + // Add all of the users of this instruction to the worklist, they might + // be constant propagatable now... + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + WorkList.insert(cast<Instruction>(*UI)); + + // Replace all of the uses of a variable with uses of the constant. + I->replaceAllUsesWith(C); + + // Remove the dead instruction. + WorkList.erase(I); + I->eraseFromParent(); + + // We made a change to the function... + Changed = true; + ++NumInstKilled; + } + } + return Changed; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/src/LLVM/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp new file mode 100644 index 0000000..e275268 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,207 @@ +//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Correlated Value Propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "correlated-value-propagation" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPhis, "Number of phis propagated"); +STATISTIC(NumSelects, "Number of selects propagated"); +STATISTIC(NumMemAccess, "Number of memory access targets propagated"); +STATISTIC(NumCmps, "Number of comparisons propagated"); + +namespace { + class CorrelatedValuePropagation : public FunctionPass { + LazyValueInfo *LVI; + + bool processSelect(SelectInst *SI); + bool processPHI(PHINode *P); + bool processMemAccess(Instruction *I); + bool processCmp(CmpInst *C); + + public: + static char ID; + CorrelatedValuePropagation(): FunctionPass(ID) { + initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LazyValueInfo>(); + } + }; +} + +char CorrelatedValuePropagation::ID = 0; +INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation", + "Value Propagation", false, false) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) +INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation", + "Value Propagation", false, false) + +// Public interface to the Value Propagation pass +Pass *llvm::createCorrelatedValuePropagationPass() { + return new CorrelatedValuePropagation(); +} + +bool CorrelatedValuePropagation::processSelect(SelectInst *S) { + if (S->getType()->isVectorTy()) return false; + if (isa<Constant>(S->getOperand(0))) return false; + + Constant *C = LVI->getConstant(S->getOperand(0), S->getParent()); + if (!C) return false; + + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return false; + + Value *ReplaceWith = S->getOperand(1); + Value *Other = S->getOperand(2); + if (!CI->isOne()) std::swap(ReplaceWith, Other); + if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType()); + + S->replaceAllUsesWith(ReplaceWith); + S->eraseFromParent(); + + ++NumSelects; + + return true; +} + +bool CorrelatedValuePropagation::processPHI(PHINode *P) { + bool Changed = false; + + BasicBlock *BB = P->getParent(); + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + Value *Incoming = P->getIncomingValue(i); + if (isa<Constant>(Incoming)) continue; + + Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), + P->getIncomingBlock(i), + BB); + if (!C) continue; + + P->setIncomingValue(i, C); + Changed = true; + } + + if (Value *V = SimplifyInstruction(P)) { + P->replaceAllUsesWith(V); + P->eraseFromParent(); + Changed = true; + } + + ++NumPhis; + + return Changed; +} + +bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { + Value *Pointer = 0; + if (LoadInst *L = dyn_cast<LoadInst>(I)) + Pointer = L->getPointerOperand(); + else + Pointer = cast<StoreInst>(I)->getPointerOperand(); + + if (isa<Constant>(Pointer)) return false; + + Constant *C = LVI->getConstant(Pointer, I->getParent()); + if (!C) return false; + + ++NumMemAccess; + I->replaceUsesOfWith(Pointer, C); + return true; +} + +/// processCmp - If the value of this comparison could be determined locally, +/// constant propagation would already have figured it out. Instead, walk +/// the predecessors and statically evaluate the comparison based on information +/// available on that edge. If a given static evaluation is true on ALL +/// incoming edges, then it's true universally and we can simplify the compare. +bool CorrelatedValuePropagation::processCmp(CmpInst *C) { + Value *Op0 = C->getOperand(0); + if (isa<Instruction>(Op0) && + cast<Instruction>(Op0)->getParent() == C->getParent()) + return false; + + Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); + if (!Op1) return false; + + pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent()); + if (PI == PE) return false; + + LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Result == LazyValueInfo::Unknown) return false; + + ++PI; + while (PI != PE) { + LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Res != Result) return false; + ++PI; + } + + ++NumCmps; + + if (Result == LazyValueInfo::True) + C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext())); + else + C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext())); + + C->eraseFromParent(); + + return true; +} + +bool CorrelatedValuePropagation::runOnFunction(Function &F) { + LVI = &getAnalysis<LazyValueInfo>(); + + bool FnChanged = false; + + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + bool BBChanged = false; + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { + Instruction *II = BI++; + switch (II->getOpcode()) { + case Instruction::Select: + BBChanged |= processSelect(cast<SelectInst>(II)); + break; + case Instruction::PHI: + BBChanged |= processPHI(cast<PHINode>(II)); + break; + case Instruction::ICmp: + case Instruction::FCmp: + BBChanged |= processCmp(cast<CmpInst>(II)); + break; + case Instruction::Load: + case Instruction::Store: + BBChanged |= processMemAccess(II); + break; + } + } + + FnChanged |= BBChanged; + } + + return FnChanged; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/DCE.cpp b/src/LLVM/lib/Transforms/Scalar/DCE.cpp new file mode 100644 index 0000000..8dbcc23 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/DCE.cpp
@@ -0,0 +1,135 @@ +//===- DCE.cpp - Code to perform dead code elimination --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements dead inst elimination and dead code elimination. +// +// Dead Inst Elimination performs a single pass over the function removing +// instructions that are obviously dead. Dead Code Elimination is similar, but +// it rechecks instructions that were used by removed instructions to see if +// they are newly dead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dce" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Instruction.h" +#include "llvm/Pass.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); +STATISTIC(DCEEliminated, "Number of insts removed"); + +namespace { + //===--------------------------------------------------------------------===// + // DeadInstElimination pass implementation + // + struct DeadInstElimination : public BasicBlockPass { + static char ID; // Pass identification, replacement for typeid + DeadInstElimination() : BasicBlockPass(ID) { + initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { + Instruction *Inst = DI++; + if (isInstructionTriviallyDead(Inst)) { + Inst->eraseFromParent(); + Changed = true; + ++DIEEliminated; + } + } + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; +} + +char DeadInstElimination::ID = 0; +INITIALIZE_PASS(DeadInstElimination, "die", + "Dead Instruction Elimination", false, false) + +Pass *llvm::createDeadInstEliminationPass() { + return new DeadInstElimination(); +} + + +namespace { + //===--------------------------------------------------------------------===// + // DeadCodeElimination pass implementation + // + struct DCE : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DCE() : FunctionPass(ID) { + initializeDCEPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; +} + +char DCE::ID = 0; +INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) + +bool DCE::runOnFunction(Function &F) { + // Start out with all of the instructions in the worklist... + std::vector<Instruction*> WorkList; + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) + WorkList.push_back(&*i); + + // Loop over the worklist finding instructions that are dead. If they are + // dead make them drop all of their uses, making other instructions + // potentially dead, and work until the worklist is empty. + // + bool MadeChange = false; + while (!WorkList.empty()) { + Instruction *I = WorkList.back(); + WorkList.pop_back(); + + if (isInstructionTriviallyDead(I)) { // If the instruction is dead. + // Loop over all of the values that the instruction uses, if there are + // instructions being used, add them to the worklist, because they might + // go dead after this one is removed. + // + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *Used = dyn_cast<Instruction>(*OI)) + WorkList.push_back(Used); + + // Remove the instruction. + I->eraseFromParent(); + + // Remove the instruction from the worklist if it still exists in it. + for (std::vector<Instruction*>::iterator WI = WorkList.begin(); + WI != WorkList.end(); ) { + if (*WI == I) + WI = WorkList.erase(WI); + else + ++WI; + } + + MadeChange = true; + ++DCEEliminated; + } + } + return MadeChange; +} + +FunctionPass *llvm::createDeadCodeEliminationPass() { + return new DCE(); +} +
diff --git a/src/LLVM/lib/Transforms/Scalar/DeadStoreElimination.cpp b/src/LLVM/lib/Transforms/Scalar/DeadStoreElimination.cpp index 84e6a40..a593d0f 100644 --- a/src/LLVM/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/src/LLVM/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -19,16 +19,20 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumFastStores, "Number of stores deleted"); @@ -36,58 +40,107 @@ namespace { struct DSE : public FunctionPass { - TargetData *TD; + AliasAnalysis *AA; + MemoryDependenceAnalysis *MD; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID) {} + DSE() : FunctionPass(ID), AA(0), MD(0) { + initializeDSEPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F) { - bool Changed = false; - + AA = &getAnalysis<AliasAnalysis>(); + MD = &getAnalysis<MemoryDependenceAnalysis>(); DominatorTree &DT = getAnalysis<DominatorTree>(); - + + bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. if (DT.isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); + + AA = 0; MD = 0; return Changed; } - - bool runOnBasicBlock(BasicBlock &BB); - bool handleFreeWithNonTrivialDependency(const CallInst *F, - MemDepResult Dep); - bool handleEndBlock(BasicBlock &BB); - bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, - BasicBlock::iterator &BBI, - SmallPtrSet<Value*, 64> &deadPointers); - void DeleteDeadInstruction(Instruction *I, - SmallPtrSet<Value*, 64> *deadPointers = 0); - - // getAnalysisUsage - We require post dominance frontiers (aka Control - // Dependence Graph) + bool runOnBasicBlock(BasicBlock &BB); + bool HandleFree(CallInst *F); + bool handleEndBlock(BasicBlock &BB); + void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, + SmallPtrSet<Value*, 16> &DeadStackObjects); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<DominatorTree>(); AU.addRequired<AliasAnalysis>(); AU.addRequired<MemoryDependenceAnalysis>(); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<DominatorTree>(); AU.addPreserved<MemoryDependenceAnalysis>(); } - - unsigned getPointerSize(Value *V) const; }; } char DSE::ID = 0; -INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false); +INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false) FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } -/// doesClobberMemory - Does this instruction clobber (write without reading) -/// some memory? -static bool doesClobberMemory(Instruction *I) { +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// and zero out all the operands of this instruction. If any of them become +/// dead, delete them and the computation tree that feeds them. +/// +/// If ValueSet is non-null, remove any deleted instructions from it as well. +/// +static void DeleteDeadInstruction(Instruction *I, + MemoryDependenceAnalysis &MD, + SmallPtrSet<Value*, 16> *ValueSet = 0) { + SmallVector<Instruction*, 32> NowDeadInsts; + + NowDeadInsts.push_back(I); + --NumFastOther; + + // Before we touch this instruction, remove it from memdep! + do { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // This instruction is dead, zap it, in stages. Start by removing it from + // MemDep, which needs to know the operands and needs it to be in the + // function. + MD.removeInstruction(DeadInst); + + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { + Value *Op = DeadInst->getOperand(op); + DeadInst->setOperand(op, 0); + + // If this operand just became dead, add it to the NowDeadInsts list. + if (!Op->use_empty()) continue; + + if (Instruction *OpI = dyn_cast<Instruction>(Op)) + if (isInstructionTriviallyDead(OpI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + + if (ValueSet) ValueSet->erase(DeadInst); + } while (!NowDeadInsts.empty()); +} + + +/// hasMemoryWrite - Does this instruction write some memory? This only returns +/// true for things that we can analyze with other helpers below. +static bool hasMemoryWrite(Instruction *I) { if (isa<StoreInst>(I)) return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { @@ -105,141 +158,307 @@ return false; } -/// isElidable - If the value of this instruction and the memory it writes to is -/// unused, may we delete this instrtction? -static bool isElidable(Instruction *I) { - assert(doesClobberMemory(I)); - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) - return II->getIntrinsicID() != Intrinsic::lifetime_end; - if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return !SI->isVolatile(); - return true; +/// getLocForWrite - Return a Location stored to by the specified instruction. +/// If isRemovable returns true, this function and getLocForRead completely +/// describe the memory operations for this instruction. +static AliasAnalysis::Location +getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + return AA.getLocation(SI); + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { + // memcpy/memmove/memset. + AliasAnalysis::Location Loc = AA.getLocationForDest(MI); + // If we don't have target data around, an unknown size in Location means + // that we should use the size of the pointee type. This isn't valid for + // memset/memcpy, which writes more than an i8. + if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0) + return AliasAnalysis::Location(); + return Loc; + } + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); + if (II == 0) return AliasAnalysis::Location(); + + switch (II->getIntrinsicID()) { + default: return AliasAnalysis::Location(); // Unhandled intrinsic. + case Intrinsic::init_trampoline: + // If we don't have target data around, an unknown size in Location means + // that we should use the size of the pointee type. This isn't valid for + // init.trampoline, which writes more than an i8. + if (AA.getTargetData() == 0) return AliasAnalysis::Location(); + + // FIXME: We don't know the size of the trampoline, so we can't really + // handle it here. + return AliasAnalysis::Location(II->getArgOperand(0)); + case Intrinsic::lifetime_end: { + uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + return AliasAnalysis::Location(II->getArgOperand(1), Len); + } + } } -/// getPointerOperand - Return the pointer that is being clobbered. -static Value *getPointerOperand(Instruction *I) { - assert(doesClobberMemory(I)); +/// getLocForRead - Return the location read by the specified "hasMemoryWrite" +/// instruction if any. +static AliasAnalysis::Location +getLocForRead(Instruction *Inst, AliasAnalysis &AA) { + assert(hasMemoryWrite(Inst) && "Unknown instruction case"); + + // The only instructions that both read and write are the mem transfer + // instructions (memcpy/memmove). + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) + return AA.getLocationForSource(MTI); + return AliasAnalysis::Location(); +} + + +/// isRemovable - If the value of this instruction and the memory it writes to +/// is unused, may we delete this instruction? +static bool isRemovable(Instruction *I) { + // Don't remove volatile/atomic stores. + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->isUnordered(); + + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate"); + case Intrinsic::lifetime_end: + // Never remove dead lifetime_end's, e.g. because it is followed by a + // free. + return false; + case Intrinsic::init_trampoline: + // Always safe to remove init_trampoline. + return true; + + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + // Don't remove volatile memory intrinsics. + return !cast<MemIntrinsic>(II)->isVolatile(); + } +} + +/// getStoredPointerOperand - Return the pointer that is being written to. +static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) - return MI->getArgOperand(0); + return MI->getDest(); IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: return II->getArgOperand(0); - case Intrinsic::lifetime_end: - return II->getArgOperand(1); } } -/// getStoreSize - Return the length in bytes of the write by the clobbering -/// instruction. If variable or unknown, returns -1. -static unsigned getStoreSize(Instruction *I, const TargetData *TD) { - assert(doesClobberMemory(I)); - if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!TD) return -1u; - return TD->getTypeStoreSize(SI->getOperand(0)->getType()); +static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) { + const TargetData *TD = AA.getTargetData(); + if (TD == 0) + return AliasAnalysis::UnknownSize; + + if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { + // Get size information for the alloca + if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) + return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); + return AliasAnalysis::UnknownSize; } - Value *Len; - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { - Len = MI->getLength(); - } else { - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: assert(false && "Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return -1u; - case Intrinsic::lifetime_end: - Len = II->getArgOperand(0); - break; + assert(isa<Argument>(V) && "Expected AllocaInst or Argument!"); + PointerType *PT = cast<PointerType>(V->getType()); + return TD->getTypeAllocSize(PT->getElementType()); +} + +/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is +/// pointing to an object with a pointer size we can trust. +static bool isObjectPointerWithTrustworthySize(const Value *V) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) + return !AI->isArrayAllocation(); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return !GV->mayBeOverridden(); + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + return false; +} + +/// isCompleteOverwrite - Return true if a store to the 'Later' location +/// completely overwrites a store to the 'Earlier' location. +static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, + const AliasAnalysis::Location &Earlier, + AliasAnalysis &AA) { + const Value *P1 = Earlier.Ptr->stripPointerCasts(); + const Value *P2 = Later.Ptr->stripPointerCasts(); + + // If the start pointers are the same, we just have to compare sizes to see if + // the later store was larger than the earlier store. + if (P1 == P2) { + // If we don't know the sizes of either access, then we can't do a + // comparison. + if (Later.Size == AliasAnalysis::UnknownSize || + Earlier.Size == AliasAnalysis::UnknownSize) { + // If we have no TargetData information around, then the size of the store + // is inferrable from the pointee type. If they are the same type, then + // we know that the store is safe. + if (AA.getTargetData() == 0) + return Later.Ptr->getType() == Earlier.Ptr->getType(); + return false; } + + // Make sure that the Later size is >= the Earlier size. + if (Later.Size < Earlier.Size) + return false; + return true; } - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len)) - if (!LenCI->isAllOnesValue()) - return LenCI->getZExtValue(); - return -1u; + + // Otherwise, we have to have size information, and the later store has to be + // larger than the earlier one. + if (Later.Size == AliasAnalysis::UnknownSize || + Earlier.Size == AliasAnalysis::UnknownSize || + Later.Size <= Earlier.Size || AA.getTargetData() == 0) + return false; + + // Check to see if the later store is to the entire object (either a global, + // an alloca, or a byval argument). If so, then it clearly overwrites any + // other store to the same object. + const TargetData &TD = *AA.getTargetData(); + + const Value *UO1 = GetUnderlyingObject(P1, &TD), + *UO2 = GetUnderlyingObject(P2, &TD); + + // If we can't resolve the same pointers to the same object, then we can't + // analyze them at all. + if (UO1 != UO2) + return false; + + // If the "Later" store is to a recognizable object, get its size. + if (isObjectPointerWithTrustworthySize(UO2)) { + uint64_t ObjectSize = + TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType()); + if (ObjectSize == Later.Size) + return true; + } + + // Okay, we have stores to two completely different pointers. Try to + // decompose the pointer into a "base + constant_offset" form. If the base + // pointers are equal, then we can reason about the two stores. + int64_t EarlierOff = 0, LaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); + + // If the base pointers still differ, we have two completely different stores. + if (BP1 != BP2) + return false; + + // The later store completely overlaps the earlier store if: + // + // 1. Both start at the same offset and the later one's size is greater than + // or equal to the earlier one's, or + // + // |--earlier--| + // |-- later --| + // + // 2. The earlier store has an offset greater than the later offset, but which + // still lies completely within the later store. + // + // |--earlier--| + // |----- later ------| + // + // We have to be careful here as *Off is signed while *.Size is unsigned. + if (EarlierOff >= LaterOff && + uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) + return true; + + // Otherwise, they don't completely overlap. + return false; } -/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is -/// greater than or equal to the store in I2. This returns false if we don't -/// know. +/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a +/// memory region into an identical pointer) then it doesn't actually make its +/// input dead in the traditional sense. Consider this case: /// -static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2, - const TargetData *TD) { - const Type *I1Ty = getPointerOperand(I1)->getType(); - const Type *I2Ty = getPointerOperand(I2)->getType(); - - // Exactly the same type, must have exactly the same size. - if (I1Ty == I2Ty) return true; - - int I1Size = getStoreSize(I1, TD); - int I2Size = getStoreSize(I2, TD); - - return I1Size != -1 && I2Size != -1 && I1Size >= I2Size; +/// memcpy(A <- B) +/// memcpy(A <- A) +/// +/// In this case, the second store to A does not make the first store to A dead. +/// The usual situation isn't an explicit A<-A store like this (which can be +/// trivially removed) but a case where two pointers may alias. +/// +/// This function detects when it is unsafe to remove a dependent instruction +/// because the DSE inducing instruction may be a self-read. +static bool isPossibleSelfRead(Instruction *Inst, + const AliasAnalysis::Location &InstStoreLoc, + Instruction *DepWrite, AliasAnalysis &AA) { + // Self reads can only happen for instructions that read memory. Get the + // location read. + AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA); + if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction. + + // If the read and written loc obviously don't alias, it isn't a read. + if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; + + // Okay, 'Inst' may copy over itself. However, we can still remove a the + // DepWrite instruction if we can prove that it reads from the same location + // as Inst. This handles useful cases like: + // memcpy(A <- B) + // memcpy(A <- B) + // Here we don't know if A/B may alias, but we do know that B/B are must + // aliases, so removing the first memcpy is safe (assuming it writes <= # + // bytes as the second one. + AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA); + + if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) + return false; + + // If DepWrite doesn't read memory or if we can't prove it is a must alias, + // then it can't be considered dead. + return true; } + +//===----------------------------------------------------------------------===// +// DSE Pass +//===----------------------------------------------------------------------===// + bool DSE::runOnBasicBlock(BasicBlock &BB) { - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - TD = getAnalysisIfAvailable<TargetData>(); - bool MadeChange = false; - + // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { Instruction *Inst = BBI++; - - // If we find a store or a free, get its memory dependence. - if (!doesClobberMemory(Inst)) - continue; - - MemDepResult InstDep = MD.getDependency(Inst); - - // Ignore non-local stores. - // FIXME: cross-block DSE would be fun. :) - if (InstDep.isNonLocal()) continue; - - // If not a definite must-alias dependency, ignore it. - if (!InstDep.isDef()) - continue; - - // If this is a store-store dependence, then the previous store is dead so - // long as this store is at least as big as it. - if (doesClobberMemory(InstDep.getInst())) { - Instruction *DepStore = InstDep.getInst(); - if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) && - isElidable(DepStore)) { - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepStore); - ++NumFastStores; - MadeChange = true; - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst; - if (BBI != BB.begin()) - --BBI; - continue; - } - } - - if (!isElidable(Inst)) + // Handle 'free' calls specially. + if (CallInst *F = isFreeCall(Inst)) { + MadeChange |= HandleFree(F); continue; - + } + + // If we find something that writes memory, get its memory dependence. + if (!hasMemoryWrite(Inst)) + continue; + + MemDepResult InstDep = MD->getDependency(Inst); + + // Ignore any store where we can't find a local dependence. + // FIXME: cross-block DSE would be fun. :) + if (!InstDep.isDef() && !InstDep.isClobber()) + continue; + // If we're storing the same value back to a pointer that we just // loaded from, then the store can be removed. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad) { + SI->getOperand(0) == DepLoad && isRemovable(SI)) { + DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " + << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); + // DeleteDeadInstruction can delete the current instruction. Save BBI // in case we need it. WeakVH NextInst(BBI); - - DeleteDeadInstruction(SI); - + + DeleteDeadInstruction(SI, *MD); + if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. @@ -250,57 +469,108 @@ } } } - - // If this is a lifetime end marker, we can throw away the store. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) { - if (II->getIntrinsicID() == Intrinsic::lifetime_end) { + + // Figure out what location is being stored to. + AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA); + + // If we didn't get a useful location, fail. + if (Loc.Ptr == 0) + continue; + + while (InstDep.isDef() || InstDep.isClobber()) { + // Get the memory clobbered by the instruction we depend on. MemDep will + // skip any instructions that 'Loc' clearly doesn't interact with. If we + // end up depending on a may- or must-aliased load, then we can't optimize + // away the store and we bail out. However, if we depend on on something + // that overwrites the memory location we *can* potentially optimize it. + // + // Find out what memory location the dependent instruction stores. + Instruction *DepWrite = InstDep.getInst(); + AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); + // If we didn't get a useful location, or if it isn't a size, bail out. + if (DepLoc.Ptr == 0) + break; + + // If we find a write that is a) removable (i.e., non-volatile), b) is + // completely obliterated by the store to 'Loc', and c) which we know that + // 'Inst' doesn't load from, then we can remove it. + if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) && + !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); + // Delete the store and now-dead instructions that feed it. - // DeleteDeadInstruction can delete the current instruction. Save BBI - // in case we need it. - WeakVH NextInst(BBI); - - DeleteDeadInstruction(Inst); - - if (NextInst == 0) // Next instruction deleted. - BBI = BB.begin(); - else if (BBI != BB.begin()) // Revisit this instruction if possible. - --BBI; + DeleteDeadInstruction(DepWrite, *MD); ++NumFastStores; MadeChange = true; - continue; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; + if (BBI != BB.begin()) + --BBI; + break; } + + // If this is a may-aliased store that is clobbering the store value, we + // can keep searching past it for another must-aliased pointer that stores + // to the same location. For example, in: + // store -> P + // store -> Q + // store -> P + // we can remove the first store to P even though we don't know if P and Q + // alias. + if (DepWrite == &BB.front()) break; + + // Can't look past this instruction if it might read 'Loc'. + if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref) + break; + + InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB); } } - + // If this block ends in a return, unwind, or unreachable, all allocas are // dead at its end, which means stores to them are also dead. if (BB.getTerminator()->getNumSuccessors() == 0) MadeChange |= handleEndBlock(BB); - + return MadeChange; } -/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose -/// dependency is a store to a field of that structure. -bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F, - MemDepResult Dep) { - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - - Instruction *Dependency = Dep.getInst(); - if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency)) - return false; - - Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); +/// HandleFree - Handle frees of entire structures whose dependency is a store +/// to a field of that structure. +bool DSE::HandleFree(CallInst *F) { + bool MadeChange = false; - // Check for aliasing. - if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) != - AliasAnalysis::MustAlias) - return false; - - // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency); - ++NumFastStores; - return true; + MemDepResult Dep = MD->getDependency(F); + + while (Dep.isDef() || Dep.isClobber()) { + Instruction *Dependency = Dep.getInst(); + if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) + return MadeChange; + + Value *DepPointer = + GetUnderlyingObject(getStoredPointerOperand(Dependency)); + + // Check for aliasing. + if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) + return MadeChange; + + // DCE instructions only used to calculate that store + DeleteDeadInstruction(Dependency, *MD); + ++NumFastStores; + MadeChange = true; + + // Inst's old Dependency is now deleted. Compute the next dependency, + // which may also be dead, as in + // s[0] = 0; + // s[1] = 0; // This has just been deleted. + // free(s); + Dep = MD->getDependency(F); + }; + + return MadeChange; } /// handleEndBlock - Remove dead stores to stack-allocated locations in the @@ -310,257 +580,156 @@ /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - bool MadeChange = false; - - // Pointers alloca'd in this function are dead in the end block - SmallPtrSet<Value*, 64> deadPointers; - + + // Keep track of all of the stack objects that are dead at the end of the + // function. + SmallPtrSet<Value*, 16> DeadStackObjects; + // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - deadPointers.insert(AI); - + DeadStackObjects.insert(AI); + // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValAttr()) - deadPointers.insert(AI); - + DeadStackObjects.insert(AI); + // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; - - // If we find a store whose pointer is dead. - if (doesClobberMemory(BBI)) { - if (isElidable(BBI)) { - // See through pointer-to-pointer bitcasts - Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject(); - // Alloca'd pointers or byval arguments (which are functionally like - // alloca's) are valid candidates for removal. - if (deadPointers.count(pointerOperand)) { - // DCE instructions only used to calculate that store. - Instruction *Dead = BBI; - ++BBI; - DeleteDeadInstruction(Dead, &deadPointers); - ++NumFastStores; - MadeChange = true; - continue; - } - } - - // Because a memcpy or memmove is also a load, we can't skip it if we - // didn't remove it. - if (!isa<MemTransferInst>(BBI)) + // If we find a store, check to see if it points into a dead stack value. + if (hasMemoryWrite(BBI) && isRemovable(BBI)) { + // See through pointer-to-pointer bitcasts + Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI)); + + // Stores to stack values are valid candidates for removal. + if (DeadStackObjects.count(Pointer)) { + Instruction *Dead = BBI++; + + DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " + << *Dead << "\n Object: " << *Pointer << '\n'); + + // DCE instructions only used to calculate that store. + DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); + ++NumFastStores; + MadeChange = true; continue; + } } - - Value *killPointer = 0; - uint64_t killPointerSize = ~0UL; - - // If we encounter a use of the pointer, it is no longer considered dead - if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { - // However, if this load is unused and not volatile, we can go ahead and - // remove it, and not have to worry about it making our pointer undead! - if (L->use_empty() && !L->isVolatile()) { - ++BBI; - DeleteDeadInstruction(L, &deadPointers); - ++NumFastOther; - MadeChange = true; - continue; - } - - killPointer = L->getPointerOperand(); - } else if (isa<MemTransferInst>(BBI) && - isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) { - killPointer = cast<MemTransferInst>(BBI)->getSource(); - killPointerSize = cast<ConstantInt>( - cast<MemTransferInst>(BBI)->getLength())->getZExtValue(); - } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { - deadPointers.erase(A); - - // Dead alloca's can be DCE'd when we reach them - if (A->use_empty()) { - ++BBI; - DeleteDeadInstruction(A, &deadPointers); - ++NumFastOther; - MadeChange = true; - } - - continue; - } else if (CallSite CS = cast<Value>(BBI)) { - // If this call does not access memory, it can't - // be undeadifying any of our pointers. - if (AA.doesNotAccessMemory(CS)) - continue; - - unsigned modRef = 0; - unsigned other = 0; - - // Remove any pointers made undead by the call from the dead set - std::vector<Value*> dead; - for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(), - E = deadPointers.end(); I != E; ++I) { - // HACK: if we detect that our AA is imprecise, it's not - // worth it to scan the rest of the deadPointers set. Just - // assume that the AA will return ModRef for everything, and - // go ahead and bail. - if (modRef >= 16 && other == 0) { - deadPointers.clear(); - return MadeChange; - } - - // See if the call site touches it - AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, - getPointerSize(*I)); - - if (A == AliasAnalysis::ModRef) - ++modRef; - else - ++other; - - if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) - dead.push_back(*I); - } - for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end(); - I != E; ++I) - deadPointers.erase(*I); - - continue; - } else if (isInstructionTriviallyDead(BBI)) { - // For any non-memory-affecting non-terminators, DCE them as we reach them - Instruction *Inst = BBI; - ++BBI; - DeleteDeadInstruction(Inst, &deadPointers); + // Remove any dead non-memory-mutating instructions. + if (isInstructionTriviallyDead(BBI)) { + Instruction *Inst = BBI++; + DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; } - - if (!killPointer) + + if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { + DeadStackObjects.erase(A); continue; - - killPointer = killPointer->getUnderlyingObject(); - - // Deal with undead pointers - MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI, - deadPointers); - } - - return MadeChange; -} - -/// RemoveUndeadPointers - check for uses of a pointer that make it -/// undead when scanning for dead stores to alloca's. -bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, - BasicBlock::iterator &BBI, - SmallPtrSet<Value*, 64> &deadPointers) { - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - - // If the kill pointer can be easily reduced to an alloca, - // don't bother doing extraneous AA queries. - if (deadPointers.count(killPointer)) { - deadPointers.erase(killPointer); - return false; - } - - // A global can't be in the dead pointer set. - if (isa<GlobalValue>(killPointer)) - return false; - - bool MadeChange = false; - - SmallVector<Value*, 16> undead; - - for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(), - E = deadPointers.end(); I != E; ++I) { - // See if this pointer could alias it - AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I), - killPointer, killPointerSize); - - // If it must-alias and a store, we can delete it - if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) { - StoreInst *S = cast<StoreInst>(BBI); - - // Remove it! - ++BBI; - DeleteDeadInstruction(S, &deadPointers); - ++NumFastStores; - MadeChange = true; - - continue; - - // Otherwise, it is undead - } else if (A != AliasAnalysis::NoAlias) - undead.push_back(*I); - } - - for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end(); - I != E; ++I) - deadPointers.erase(*I); - - return MadeChange; -} - -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through -/// and zero out all the operands of this instruction. If any of them become -/// dead, delete them and the computation tree that feeds them. -/// -/// If ValueSet is non-null, remove any deleted instructions from it as well. -/// -void DSE::DeleteDeadInstruction(Instruction *I, - SmallPtrSet<Value*, 64> *ValueSet) { - SmallVector<Instruction*, 32> NowDeadInsts; - - NowDeadInsts.push_back(I); - --NumFastOther; - - // Before we touch this instruction, remove it from memdep! - MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>(); - do { - Instruction *DeadInst = NowDeadInsts.pop_back_val(); - - ++NumFastOther; - - // This instruction is dead, zap it, in stages. Start by removing it from - // MemDep, which needs to know the operands and needs it to be in the - // function. - MDA.removeInstruction(DeadInst); - - for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { - Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); - - // If this operand just became dead, add it to the NowDeadInsts list. - if (!Op->use_empty()) continue; - - if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) - NowDeadInsts.push_back(OpI); } - - DeadInst->eraseFromParent(); - - if (ValueSet) ValueSet->erase(DeadInst); - } while (!NowDeadInsts.empty()); -} -unsigned DSE::getPointerSize(Value *V) const { - if (TD) { - if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { - // Get size information for the alloca - if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) - return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); + if (CallSite CS = cast<Value>(BBI)) { + // If this call does not access memory, it can't be loading any of our + // pointers. + if (AA->doesNotAccessMemory(CS)) + continue; + + // If the call might load from any of our allocas, then any store above + // the call is live. + SmallVector<Value*, 8> LiveAllocas; + for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), + E = DeadStackObjects.end(); I != E; ++I) { + // See if the call site touches it. + AliasAnalysis::ModRefResult A = + AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); + + if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) + LiveAllocas.push_back(*I); + } + + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), + E = LiveAllocas.end(); I != E; ++I) + DeadStackObjects.erase(*I); + + // If all of the allocas were clobbered by the call then we're not going + // to find anything else to process. + if (DeadStackObjects.empty()) + return MadeChange; + + continue; + } + + AliasAnalysis::Location LoadedLoc; + + // If we encounter a use of the pointer, it is no longer considered dead + if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { + if (!L->isUnordered()) // Be conservative with atomic/volatile load + break; + LoadedLoc = AA->getLocation(L); + } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { + LoadedLoc = AA->getLocation(V); + } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { + LoadedLoc = AA->getLocationForSource(MTI); + } else if (!BBI->mayReadFromMemory()) { + // Instruction doesn't read memory. Note that stores that weren't removed + // above will hit this case. + continue; } else { - assert(isa<Argument>(V) && "Expected AllocaInst or Argument!"); - const PointerType *PT = cast<PointerType>(V->getType()); - return TD->getTypeAllocSize(PT->getElementType()); + // Unknown inst; assume it clobbers everything. + break; } + + // Remove any allocas from the DeadPointer set that are loaded, as this + // makes any stores above the access live. + RemoveAccessedObjects(LoadedLoc, DeadStackObjects); + + // If all of the allocas were clobbered by the access then we're not going + // to find anything else to process. + if (DeadStackObjects.empty()) + break; } - return ~0U; + + return MadeChange; } + +/// RemoveAccessedObjects - Check to see if the specified location may alias any +/// of the stack objects in the DeadStackObjects set. If so, they become live +/// because the location is being loaded. +void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, + SmallPtrSet<Value*, 16> &DeadStackObjects) { + const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr); + + // A constant can't be in the dead pointer set. + if (isa<Constant>(UnderlyingPointer)) + return; + + // If the kill pointer can be easily reduced to an alloca, don't bother doing + // extraneous AA queries. + if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { + DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer)); + return; + } + + SmallVector<Value*, 16> NowLive; + for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), + E = DeadStackObjects.end(); I != E; ++I) { + // See if the loaded location could alias the stack location. + AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA)); + if (!AA->isNoAlias(StackLoc, LoadedLoc)) + NowLive.push_back(*I); + } + + for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end(); + I != E; ++I) + DeadStackObjects.erase(*I); +} +
diff --git a/src/LLVM/lib/Transforms/Scalar/EarlyCSE.cpp b/src/LLVM/lib/Transforms/Scalar/EarlyCSE.cpp new file mode 100644 index 0000000..c0223d2 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@ +//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs a simple dominator tree walk that eliminates trivially +// redundant instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "early-cse" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/RecyclingAllocator.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); +STATISTIC(NumCSE, "Number of instructions CSE'd"); +STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); +STATISTIC(NumCSECall, "Number of call instructions CSE'd"); +STATISTIC(NumDSE, "Number of trivial dead stores removed"); + +static unsigned getHash(const void *V) { + return DenseMapInfo<const void*>::getHashValue(V); +} + +//===----------------------------------------------------------------------===// +// SimpleValue +//===----------------------------------------------------------------------===// + +namespace { + /// SimpleValue - Instances of this struct represent available values in the + /// scoped hash table. + struct SimpleValue { + Instruction *Inst; + + SimpleValue(Instruction *I) : Inst(I) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + bool isSentinel() const { + return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || + Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + // This can only handle non-void readnone functions. + if (CallInst *CI = dyn_cast<CallInst>(Inst)) + return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); + return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) || + isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) || + isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || + isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) || + isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst); + } + }; +} + +namespace llvm { +// SimpleValue is POD. +template<> struct isPodLike<SimpleValue> { + static const bool value = true; +}; + +template<> struct DenseMapInfo<SimpleValue> { + static inline SimpleValue getEmptyKey() { + return DenseMapInfo<Instruction*>::getEmptyKey(); + } + static inline SimpleValue getTombstoneKey() { + return DenseMapInfo<Instruction*>::getTombstoneKey(); + } + static unsigned getHashValue(SimpleValue Val); + static bool isEqual(SimpleValue LHS, SimpleValue RHS); +}; +} + +unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { + Instruction *Inst = Val.Inst; + + // Hash in all of the operands as pointers. + unsigned Res = 0; + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); + + if (CastInst *CI = dyn_cast<CastInst>(Inst)) + Res ^= getHash(CI->getType()); + else if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) + Res ^= CI->getPredicate(); + else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) { + for (ExtractValueInst::idx_iterator I = EVI->idx_begin(), + E = EVI->idx_end(); I != E; ++I) + Res ^= *I; + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) { + for (InsertValueInst::idx_iterator I = IVI->idx_begin(), + E = IVI->idx_end(); I != E; ++I) + Res ^= *I; + } else { + // nothing extra to hash in. + assert((isa<CallInst>(Inst) || + isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) || + isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || + isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) && + "Invalid/unknown instruction"); + } + + // Mix in the opcode. + return (Res << 1) ^ Inst->getOpcode(); +} + +bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) { + Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; + + if (LHS.isSentinel() || RHS.isSentinel()) + return LHSI == RHSI; + + if (LHSI->getOpcode() != RHSI->getOpcode()) return false; + return LHSI->isIdenticalTo(RHSI); +} + +//===----------------------------------------------------------------------===// +// CallValue +//===----------------------------------------------------------------------===// + +namespace { + /// CallValue - Instances of this struct represent available call values in + /// the scoped hash table. + struct CallValue { + Instruction *Inst; + + CallValue(Instruction *I) : Inst(I) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + bool isSentinel() const { + return Inst == DenseMapInfo<Instruction*>::getEmptyKey() || + Inst == DenseMapInfo<Instruction*>::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + // Don't value number anything that returns void. + if (Inst->getType()->isVoidTy()) + return false; + + CallInst *CI = dyn_cast<CallInst>(Inst); + if (CI == 0 || !CI->onlyReadsMemory()) + return false; + return true; + } + }; +} + +namespace llvm { + // CallValue is POD. + template<> struct isPodLike<CallValue> { + static const bool value = true; + }; + + template<> struct DenseMapInfo<CallValue> { + static inline CallValue getEmptyKey() { + return DenseMapInfo<Instruction*>::getEmptyKey(); + } + static inline CallValue getTombstoneKey() { + return DenseMapInfo<Instruction*>::getTombstoneKey(); + } + static unsigned getHashValue(CallValue Val); + static bool isEqual(CallValue LHS, CallValue RHS); + }; +} +unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { + Instruction *Inst = Val.Inst; + // Hash in all of the operands as pointers. + unsigned Res = 0; + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) { + assert(!Inst->getOperand(i)->getType()->isMetadataTy() && + "Cannot value number calls with metadata operands"); + Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); + } + + // Mix in the opcode. + return (Res << 1) ^ Inst->getOpcode(); +} + +bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) { + Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; + if (LHS.isSentinel() || RHS.isSentinel()) + return LHSI == RHSI; + return LHSI->isIdenticalTo(RHSI); +} + + +//===----------------------------------------------------------------------===// +// EarlyCSE pass. +//===----------------------------------------------------------------------===// + +namespace { + +/// EarlyCSE - This pass does a simple depth-first walk over the dominator +/// tree, eliminating trivially redundant instructions and using instsimplify +/// to canonicalize things as it goes. It is intended to be fast and catch +/// obvious cases so that instcombine and other passes are more effective. It +/// is expected that a later pass of GVN will catch the interesting/hard +/// cases. +class EarlyCSE : public FunctionPass { +public: + const TargetData *TD; + DominatorTree *DT; + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; + typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>, + AllocatorTy> ScopedHTType; + + /// AvailableValues - This scoped hash table contains the current values of + /// all of our simple scalar expressions. As we walk down the domtree, we + /// look to see if instructions are in this: if so, we replace them with what + /// we find, otherwise we insert them so that dominated values can succeed in + /// their lookup. + ScopedHTType *AvailableValues; + + /// AvailableLoads - This scoped hash table contains the current values + /// of loads. This allows us to get efficient access to dominating loads when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is + /// incremented after every possibly writing memory operation, which ensures + /// that we only CSE loads with other loads that have no intervening store. + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator; + typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>, + DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType; + LoadHTType *AvailableLoads; + + /// AvailableCalls - This scoped hash table contains the current values + /// of read-only call values. It uses the same generation count as loads. + typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType; + CallHTType *AvailableCalls; + + /// CurrentGeneration - This is the current generation of the memory value. + unsigned CurrentGeneration; + + static char ID; + explicit EarlyCSE() : FunctionPass(ID) { + initializeEarlyCSEPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + +private: + + bool processNode(DomTreeNode *Node); + + // This transformation requires dominator postdominator info + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); + } +}; +} + +char EarlyCSE::ID = 0; + +// createEarlyCSEPass - The public interface to this file. +FunctionPass *llvm::createEarlyCSEPass() { + return new EarlyCSE(); +} + +INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) + +bool EarlyCSE::processNode(DomTreeNode *Node) { + // Define a scope in the scoped hash table. When we are done processing this + // domtree node and recurse back up to our parent domtree node, this will pop + // off all the values we install. + ScopedHTType::ScopeTy Scope(*AvailableValues); + + // Define a scope for the load values so that anything we add will get + // popped when we recurse back up to our parent domtree node. + LoadHTType::ScopeTy LoadScope(*AvailableLoads); + + // Define a scope for the call values so that anything we add will get + // popped when we recurse back up to our parent domtree node. + CallHTType::ScopeTy CallScope(*AvailableCalls); + + BasicBlock *BB = Node->getBlock(); + + // If this block has a single predecessor, then the predecessor is the parent + // of the domtree node and all of the live out memory values are still current + // in this block. If this block has multiple predecessors, then they could + // have invalidated the live-out memory values of our parent value. For now, + // just be conservative and invalidate memory if this block has multiple + // predecessors. + if (BB->getSinglePredecessor() == 0) + ++CurrentGeneration; + + /// LastStore - Keep track of the last non-volatile store that we saw... for + /// as long as there in no instruction that reads memory. If we see a store + /// to the same location, we delete the dead store. This zaps trivial dead + /// stores which can occur in bitfield code among other things. + StoreInst *LastStore = 0; + + bool Changed = false; + + // See if any instructions in the block can be eliminated. If so, do it. If + // not, add them to AvailableValues. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + + // Dead instructions should just be removed. + if (isInstructionTriviallyDead(Inst)) { + DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); + Inst->eraseFromParent(); + Changed = true; + ++NumSimplify; + continue; + } + + // If the instruction can be simplified (e.g. X+0 = X) then replace it with + // its simpler value. + if (Value *V = SimplifyInstruction(Inst, TD, DT)) { + DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); + Inst->replaceAllUsesWith(V); + Inst->eraseFromParent(); + Changed = true; + ++NumSimplify; + continue; + } + + // If this is a simple instruction that we can value number, process it. + if (SimpleValue::canHandle(Inst)) { + // See if the instruction has an available value. If so, use it. + if (Value *V = AvailableValues->lookup(Inst)) { + DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n'); + Inst->replaceAllUsesWith(V); + Inst->eraseFromParent(); + Changed = true; + ++NumCSE; + continue; + } + + // Otherwise, just remember that this value is available. + AvailableValues->insert(Inst, Inst); + continue; + } + + // If this is a non-volatile load, process it. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // Ignore volatile loads. + if (!LI->isSimple()) { + LastStore = 0; + continue; + } + + // If we have an available version of this load, and if it is the right + // generation, replace this instruction. + std::pair<Value*, unsigned> InVal = + AvailableLoads->lookup(Inst->getOperand(0)); + if (InVal.first != 0 && InVal.second == CurrentGeneration) { + DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " + << *InVal.first << '\n'); + if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); + Inst->eraseFromParent(); + Changed = true; + ++NumCSELoad; + continue; + } + + // Otherwise, remember that we have this instruction. + AvailableLoads->insert(Inst->getOperand(0), + std::pair<Value*, unsigned>(Inst, CurrentGeneration)); + LastStore = 0; + continue; + } + + // If this instruction may read from memory, forget LastStore. + if (Inst->mayReadFromMemory()) + LastStore = 0; + + // If this is a read-only call, process it. + if (CallValue::canHandle(Inst)) { + // If we have an available version of this call, and if it is the right + // generation, replace this instruction. + std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst); + if (InVal.first != 0 && InVal.second == CurrentGeneration) { + DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " + << *InVal.first << '\n'); + if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); + Inst->eraseFromParent(); + Changed = true; + ++NumCSECall; + continue; + } + + // Otherwise, remember that we have this instruction. + AvailableCalls->insert(Inst, + std::pair<Value*, unsigned>(Inst, CurrentGeneration)); + continue; + } + + // Okay, this isn't something we can CSE at all. Check to see if it is + // something that could modify memory. If so, our available memory values + // cannot be used so bump the generation count. + if (Inst->mayWriteToMemory()) { + ++CurrentGeneration; + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // We do a trivial form of DSE if there are two stores to the same + // location with no intervening loads. Delete the earlier store. + if (LastStore && + LastStore->getPointerOperand() == SI->getPointerOperand()) { + DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " + << *Inst << '\n'); + LastStore->eraseFromParent(); + Changed = true; + ++NumDSE; + LastStore = 0; + continue; + } + + // Okay, we just invalidated anything we knew about loaded values. Try + // to salvage *something* by remembering that the stored value is a live + // version of the pointer. It is safe to forward from volatile stores + // to non-volatile loads, so we don't have to check for volatility of + // the store. + AvailableLoads->insert(SI->getPointerOperand(), + std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration)); + + // Remember that this was the last store we saw for DSE. + if (SI->isSimple()) + LastStore = SI; + } + } + } + + unsigned LiveOutGeneration = CurrentGeneration; + for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) { + Changed |= processNode(*I); + // Pop any generation changes off the stack from the recursive walk. + CurrentGeneration = LiveOutGeneration; + } + return Changed; +} + + +bool EarlyCSE::runOnFunction(Function &F) { + TD = getAnalysisIfAvailable<TargetData>(); + DT = &getAnalysis<DominatorTree>(); + + // Tables that the pass uses when walking the domtree. + ScopedHTType AVTable; + AvailableValues = &AVTable; + LoadHTType LoadTable; + AvailableLoads = &LoadTable; + CallHTType CallTable; + AvailableCalls = &CallTable; + + CurrentGeneration = 0; + return processNode(DT->getRootNode()); +}
diff --git a/src/LLVM/lib/Transforms/Scalar/GVN.cpp b/src/LLVM/lib/Transforms/Scalar/GVN.cpp index 1afb6ee..cbfdbcd 100644 --- a/src/LLVM/lib/Transforms/Scalar/GVN.cpp +++ b/src/LLVM/lib/Transforms/Scalar/GVN.cpp
@@ -17,50 +17,45 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" -#include "llvm/BasicBlock.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" -#include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" -#include "llvm/Operator.h" -#include "llvm/Value.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace PatternMatch; STATISTIC(NumGVNInstr, "Number of instructions deleted"); STATISTIC(NumGVNLoad, "Number of loads deleted"); STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); STATISTIC(NumGVNBlocks, "Number of blocks merged"); +STATISTIC(NumGVNSimpl, "Number of instructions simplified"); +STATISTIC(NumGVNEqProp, "Number of equalities propagated"); STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt<bool> EnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); -static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -71,129 +66,61 @@ /// two values. namespace { struct Expression { - enum ExpressionOpcode { - ADD = Instruction::Add, - FADD = Instruction::FAdd, - SUB = Instruction::Sub, - FSUB = Instruction::FSub, - MUL = Instruction::Mul, - FMUL = Instruction::FMul, - UDIV = Instruction::UDiv, - SDIV = Instruction::SDiv, - FDIV = Instruction::FDiv, - UREM = Instruction::URem, - SREM = Instruction::SRem, - FREM = Instruction::FRem, - SHL = Instruction::Shl, - LSHR = Instruction::LShr, - ASHR = Instruction::AShr, - AND = Instruction::And, - OR = Instruction::Or, - XOR = Instruction::Xor, - TRUNC = Instruction::Trunc, - ZEXT = Instruction::ZExt, - SEXT = Instruction::SExt, - FPTOUI = Instruction::FPToUI, - FPTOSI = Instruction::FPToSI, - UITOFP = Instruction::UIToFP, - SITOFP = Instruction::SIToFP, - FPTRUNC = Instruction::FPTrunc, - FPEXT = Instruction::FPExt, - PTRTOINT = Instruction::PtrToInt, - INTTOPTR = Instruction::IntToPtr, - BITCAST = Instruction::BitCast, - ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, - FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, - SHUFFLE, SELECT, GEP, CALL, CONSTANT, - INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; - - ExpressionOpcode opcode; - const Type* type; + uint32_t opcode; + Type *type; SmallVector<uint32_t, 4> varargs; - Value *function; - Expression() { } - Expression(ExpressionOpcode o) : opcode(o) { } + Expression(uint32_t o = ~2U) : opcode(o) { } bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; - else if (opcode == EMPTY || opcode == TOMBSTONE) + if (opcode == ~0U || opcode == ~1U) return true; - else if (type != other.type) + if (type != other.type) return false; - else if (function != other.function) + if (varargs != other.varargs) return false; - else { - if (varargs.size() != other.varargs.size()) - return false; - - for (size_t i = 0; i < varargs.size(); ++i) - if (varargs[i] != other.varargs[i]) - return false; - - return true; - } - } - - bool operator!=(const Expression &other) const { - return !(*this == other); + return true; } }; class ValueTable { - private: - DenseMap<Value*, uint32_t> valueNumbering; - DenseMap<Expression, uint32_t> expressionNumbering; - AliasAnalysis* AA; - MemoryDependenceAnalysis* MD; - DominatorTree* DT; + DenseMap<Value*, uint32_t> valueNumbering; + DenseMap<Expression, uint32_t> expressionNumbering; + AliasAnalysis *AA; + MemoryDependenceAnalysis *MD; + DominatorTree *DT; - uint32_t nextValueNumber; + uint32_t nextValueNumber; - Expression::ExpressionOpcode getOpcode(CmpInst* C); - Expression create_expression(BinaryOperator* BO); - Expression create_expression(CmpInst* C); - Expression create_expression(ShuffleVectorInst* V); - Expression create_expression(ExtractElementInst* C); - Expression create_expression(InsertElementInst* V); - Expression create_expression(SelectInst* V); - Expression create_expression(CastInst* C); - Expression create_expression(GetElementPtrInst* G); - Expression create_expression(CallInst* C); - Expression create_expression(Constant* C); - Expression create_expression(ExtractValueInst* C); - Expression create_expression(InsertValueInst* C); - - uint32_t lookup_or_add_call(CallInst* C); - public: - ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value *V); - uint32_t lookup(Value *V) const; - void add(Value *V, uint32_t num); - void clear(); - void erase(Value *v); - unsigned size(); - void setAliasAnalysis(AliasAnalysis* A) { AA = A; } - AliasAnalysis *getAliasAnalysis() const { return AA; } - void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } - void setDomTree(DominatorTree* D) { DT = D; } - uint32_t getNextUnusedValueNumber() { return nextValueNumber; } - void verifyRemoved(const Value *) const; + Expression create_expression(Instruction* I); + Expression create_extractvalue_expression(ExtractValueInst* EI); + uint32_t lookup_or_add_call(CallInst* C); + public: + ValueTable() : nextValueNumber(1) { } + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); + void clear(); + void erase(Value *v); + void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + AliasAnalysis *getAliasAnalysis() const { return AA; } + void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } + void setDomTree(DominatorTree* D) { DT = D; } + uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; }; } namespace llvm { template <> struct DenseMapInfo<Expression> { static inline Expression getEmptyKey() { - return Expression(Expression::EMPTY); + return ~0U; } static inline Expression getTombstoneKey() { - return Expression(Expression::TOMBSTONE); + return ~1U; } static unsigned getHashValue(const Expression e) { @@ -205,20 +132,13 @@ for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; - - hash = ((unsigned)((uintptr_t)e.function >> 4) ^ - (unsigned)((uintptr_t)e.function >> 9)) + - hash * 37; - + return hash; } static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } }; - -template <> -struct isPodLike<Expression> { static const bool value = true; }; } @@ -226,184 +146,73 @@ // ValueTable Internal Functions //===----------------------------------------------------------------------===// -Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa<ICmpInst>(C)) { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; - case ICmpInst::ICMP_NE: return Expression::ICMPNE; - case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; - case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; - case ICmpInst::ICMP_ULT: return Expression::ICMPULT; - case ICmpInst::ICMP_ULE: return Expression::ICMPULE; - case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; - case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; - case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; - case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; +Expression ValueTable::create_expression(Instruction *I) { + Expression e; + e.type = I->getType(); + e.opcode = I->getOpcode(); + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); + + if (CmpInst *C = dyn_cast<CmpInst>(I)) { + e.opcode = (C->getOpcode() << 8) | C->getPredicate(); + } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { + for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + } + + return e; +} + +Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { + assert(EI != 0 && "Not an ExtractValueInst?"); + Expression e; + e.type = EI->getType(); + e.opcode = 0; + + IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); + if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { + // EI might be an extract from one of our recognised intrinsics. If it + // is we'll synthesize a semantically equivalent expression instead on + // an extract value expression. + switch (I->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + e.opcode = Instruction::Add; + break; + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + e.opcode = Instruction::Sub; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + e.opcode = Instruction::Mul; + break; + default: + break; } - } else { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + + if (e.opcode != 0) { + // Intrinsic recognized. Grab its args to finish building the expression. + assert(I->getNumArgOperands() == 2 && + "Expect two args for recognised intrinsics."); + e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); + e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); + return e; } } -} -Expression ValueTable::create_expression(CallInst* C) { - Expression e; + // Not a recognised intrinsic. Fall back to producing an extract value + // expression. + e.opcode = EI->getOpcode(); + for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); - e.type = C->getType(); - e.function = C->getCalledFunction(); - e.opcode = Expression::CALL; - - CallSite CS(C); - for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - e.varargs.push_back(lookup_or_add(*I)); - - return e; -} - -Expression ValueTable::create_expression(BinaryOperator* BO) { - Expression e; - e.varargs.push_back(lookup_or_add(BO->getOperand(0))); - e.varargs.push_back(lookup_or_add(BO->getOperand(1))); - e.function = 0; - e.type = BO->getType(); - e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode()); - - return e; -} - -Expression ValueTable::create_expression(CmpInst* C) { - Expression e; - - e.varargs.push_back(lookup_or_add(C->getOperand(0))); - e.varargs.push_back(lookup_or_add(C->getOperand(1))); - e.function = 0; - e.type = C->getType(); - e.opcode = getOpcode(C); - - return e; -} - -Expression ValueTable::create_expression(CastInst* C) { - Expression e; - - e.varargs.push_back(lookup_or_add(C->getOperand(0))); - e.function = 0; - e.type = C->getType(); - e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode()); - - return e; -} - -Expression ValueTable::create_expression(ShuffleVectorInst* S) { - Expression e; - - e.varargs.push_back(lookup_or_add(S->getOperand(0))); - e.varargs.push_back(lookup_or_add(S->getOperand(1))); - e.varargs.push_back(lookup_or_add(S->getOperand(2))); - e.function = 0; - e.type = S->getType(); - e.opcode = Expression::SHUFFLE; - - return e; -} - -Expression ValueTable::create_expression(ExtractElementInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getOperand(0))); - e.varargs.push_back(lookup_or_add(E->getOperand(1))); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::EXTRACT; - - return e; -} - -Expression ValueTable::create_expression(InsertElementInst* I) { - Expression e; - - e.varargs.push_back(lookup_or_add(I->getOperand(0))); - e.varargs.push_back(lookup_or_add(I->getOperand(1))); - e.varargs.push_back(lookup_or_add(I->getOperand(2))); - e.function = 0; - e.type = I->getType(); - e.opcode = Expression::INSERT; - - return e; -} - -Expression ValueTable::create_expression(SelectInst* I) { - Expression e; - - e.varargs.push_back(lookup_or_add(I->getCondition())); - e.varargs.push_back(lookup_or_add(I->getTrueValue())); - e.varargs.push_back(lookup_or_add(I->getFalseValue())); - e.function = 0; - e.type = I->getType(); - e.opcode = Expression::SELECT; - - return e; -} - -Expression ValueTable::create_expression(GetElementPtrInst* G) { - Expression e; - - e.varargs.push_back(lookup_or_add(G->getPointerOperand())); - e.function = 0; - e.type = G->getType(); - e.opcode = Expression::GEP; - - for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); - I != E; ++I) - e.varargs.push_back(lookup_or_add(*I)); - - return e; -} - -Expression ValueTable::create_expression(ExtractValueInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); - for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) + for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); + II != IE; ++II) e.varargs.push_back(*II); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::EXTRACTVALUE; - - return e; -} - -Expression ValueTable::create_expression(InsertValueInst* E) { - Expression e; - - e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); - e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); - for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - e.function = 0; - e.type = E->getType(); - e.opcode = Expression::INSERTVALUE; return e; } @@ -470,21 +279,19 @@ // Non-local case. const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); - // FIXME: call/call dependencies for readonly calls should return def, not - // clobber! Move the checking logic to MemDep! + // FIXME: Move the checking logic to MemDep! CallInst* cdep = 0; // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { const NonLocalDepEntry *I = &deps[i]; - // Ignore non-local dependencies. if (I->getResult().isNonLocal()) continue; - // We don't handle non-depedencies. If we already have a call, reject + // We don't handle non-definitions. If we already have a call, reject // instruction dependencies. - if (I->getResult().isClobber() || cdep != 0) { + if (!I->getResult().isDef() || cdep != 0) { cdep = 0; break; } @@ -563,12 +370,8 @@ case Instruction::And: case Instruction::Or : case Instruction::Xor: - exp = create_expression(cast<BinaryOperator>(I)); - break; case Instruction::ICmp: case Instruction::FCmp: - exp = create_expression(cast<CmpInst>(I)); - break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -581,28 +384,16 @@ case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: - exp = create_expression(cast<CastInst>(I)); - break; case Instruction::Select: - exp = create_expression(cast<SelectInst>(I)); - break; case Instruction::ExtractElement: - exp = create_expression(cast<ExtractElementInst>(I)); - break; case Instruction::InsertElement: - exp = create_expression(cast<InsertElementInst>(I)); - break; case Instruction::ShuffleVector: - exp = create_expression(cast<ShuffleVectorInst>(I)); + case Instruction::InsertValue: + case Instruction::GetElementPtr: + exp = create_expression(I); break; case Instruction::ExtractValue: - exp = create_expression(cast<ExtractValueInst>(I)); - break; - case Instruction::InsertValue: - exp = create_expression(cast<InsertValueInst>(I)); - break; - case Instruction::GetElementPtr: - exp = create_expression(cast<GetElementPtrInst>(I)); + exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); break; default: valueNumbering[V] = nextValueNumber; @@ -623,14 +414,14 @@ return VI->second; } -/// clear - Remove all entries from the ValueTable +/// clear - Remove all entries from the ValueTable. void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); nextValueNumber = 1; } -/// erase - Remove a value from the value numbering +/// erase - Remove a value from the value numbering. void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -649,30 +440,89 @@ //===----------------------------------------------------------------------===// namespace { - struct ValueNumberScope { - ValueNumberScope* parent; - DenseMap<uint32_t, Value*> table; - - ValueNumberScope(ValueNumberScope* p) : parent(p) { } - }; -} - -namespace { class GVN : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { } - - private: bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; - + const TargetData *TD; + ValueTable VN; - DenseMap<BasicBlock*, ValueNumberScope*> localAvail; + + /// LeaderTable - A mapping from value numbers to lists of Value*'s that + /// have that value number. Use findLeader to query it. + struct LeaderTableEntry { + Value *Val; + BasicBlock *BB; + LeaderTableEntry *Next; + }; + DenseMap<uint32_t, LeaderTableEntry> LeaderTable; + BumpPtrAllocator TableAllocator; + + SmallVector<Instruction*, 8> InstrsToErase; + public: + static char ID; // Pass identification, replacement for typeid + explicit GVN(bool noloads = false) + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + /// markInstructionForDeletion - This removes the specified instruction from + /// our various maps and marks it for deletion. + void markInstructionForDeletion(Instruction *I) { + VN.erase(I); + InstrsToErase.push_back(I); + } + + const TargetData *getTargetData() const { return TD; } + DominatorTree &getDominatorTree() const { return *DT; } + AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } + MemoryDependenceAnalysis &getMemDep() const { return *MD; } + private: + /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for + /// its value number. + void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + LeaderTableEntry &Curr = LeaderTable[N]; + if (!Curr.Val) { + Curr.Val = V; + Curr.BB = BB; + return; + } + + LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); + Node->Val = V; + Node->BB = BB; + Node->Next = Curr.Next; + Curr.Next = Node; + } + + /// removeFromLeaderTable - Scan the list of values corresponding to a given + /// value number, and remove the given value if encountered. + void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + LeaderTableEntry* Prev = 0; + LeaderTableEntry* Curr = &LeaderTable[N]; + + while (Curr->Val != V || Curr->BB != BB) { + Prev = Curr; + Curr = Curr->Next; + } + + if (Prev) { + Prev->Next = Curr->Next; + } else { + if (!Curr->Next) { + Curr->Val = 0; + Curr->BB = 0; + } else { + LeaderTableEntry* Next = Curr->Next; + Curr->Val = Next->Val; + Curr->BB = Next->BB; + Curr->Next = Next->Next; + } + } + } // List of critical edges to be split between iterations. SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; @@ -687,24 +537,24 @@ AU.addPreserved<DominatorTree>(); AU.addPreserved<AliasAnalysis>(); } + // Helper fuctions // FIXME: eliminate or document these better - bool processLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); - bool processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase); - bool processNonLocalLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); + bool processLoad(LoadInst *L); + bool processInstruction(Instruction *I); + bool processNonLocalLoad(LoadInst *L); bool processBlock(BasicBlock *BB); - void dump(DenseMap<uint32_t, Value*>& d); + void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); - Value *CollapsePhi(PHINode* p); - bool performPRE(Function& F); - Value *lookupNumber(BasicBlock *BB, uint32_t num); + bool performPRE(Function &F); + Value *findLeader(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); + unsigned replaceAllDominatedUsesWith(Value *From, Value *To, + BasicBlock *Root); + bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); }; char GVN::ID = 0; @@ -715,7 +565,11 @@ return new GVN(NoLoads); } -INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false); +INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; @@ -727,33 +581,6 @@ errs() << "}\n"; } -static bool isSafeReplacement(PHINode* p, Instruction *inst) { - if (!isa<PHINode>(inst)) - return true; - - for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); - UI != E; ++UI) - if (PHINode* use_phi = dyn_cast<PHINode>(*UI)) - if (use_phi->getParent() == inst->getParent()) - return false; - - return true; -} - -Value *GVN::CollapsePhi(PHINode *PN) { - Value *ConstVal = PN->hasConstantValue(DT); - if (!ConstVal) return 0; - - Instruction *Inst = dyn_cast<Instruction>(ConstVal); - if (!Inst) - return ConstVal; - - if (DT->dominates(Inst, PN)) - if (isSafeReplacement(PN, Inst)) - return Inst; - return 0; -} - /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This @@ -835,7 +662,7 @@ /// CanCoerceMustAliasedValueToLoad - Return true if /// CoerceAvailableValueToLoadType will succeed. static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, - const Type *LoadTy, + Type *LoadTy, const TargetData &TD) { // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. @@ -860,40 +687,40 @@ /// /// If we can't do it, return null. static Value *CoerceAvailableValueToLoadType(Value *StoredVal, - const Type *LoadedTy, + Type *LoadedTy, Instruction *InsertPt, const TargetData &TD) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; - const Type *StoredValTy = StoredVal->getType(); + // If this is already the right type, just return it. + Type *StoredValTy = StoredVal->getType(); - uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { - if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { - // Pointer to Pointer -> use bitcast. - return new BitCastInst(StoredVal, LoadedTy, InsertPt); - } + // Pointer to Pointer -> use bitcast. + if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) + return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); // Convert source pointers to integers, which can be bitcast. if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); - StoredVal = new PtrToIntInst(StoredVal, StoredValTy, InsertPt); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } - const Type *TypeToCastTo = LoadedTy; + Type *TypeToCastTo = LoadedTy; if (TypeToCastTo->isPointerTy()) TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); if (StoredValTy != TypeToCastTo) - StoredVal = new BitCastInst(StoredVal, TypeToCastTo, InsertPt); + StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); // Cast to pointer if the load needs a pointer type. if (LoadedTy->isPointerTy()) - StoredVal = new IntToPtrInst(StoredVal, LoadedTy, InsertPt); + StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); return StoredVal; } @@ -906,78 +733,37 @@ // Convert source pointers to integers, which can be manipulated. if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); - StoredVal = new PtrToIntInst(StoredVal, StoredValTy, InsertPt); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } // Convert vectors and fp to integer, which can be manipulated. if (!StoredValTy->isIntegerTy()) { StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); - StoredVal = new BitCastInst(StoredVal, StoredValTy, InsertPt); + StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); } // If this is a big-endian system, we need to shift the value down to the low // bits so that a truncate will work. if (TD.isBigEndian()) { Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); - StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, InsertPt); + StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); } // Truncate the integer to the right size now. - const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); - StoredVal = new TruncInst(StoredVal, NewIntTy, InsertPt); + Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); + StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); if (LoadedTy == NewIntTy) return StoredVal; // If the result is a pointer, inttoptr. if (LoadedTy->isPointerTy()) - return new IntToPtrInst(StoredVal, LoadedTy, InsertPt); + return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); // Otherwise, bitcast. - return new BitCastInst(StoredVal, LoadedTy, InsertPt); + return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); } -/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can -/// be expressed as a base pointer plus a constant offset. Return the base and -/// offset to the caller. -static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const TargetData &TD) { - Operator *PtrOp = dyn_cast<Operator>(Ptr); - if (PtrOp == 0) return Ptr; - - // Just look through bitcasts. - if (PtrOp->getOpcode() == Instruction::BitCast) - return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); - - // If this is a GEP with constant indices, we can look through it. - GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); - if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = cast<ConstantInt>(*I); - if (OpC->isZero()) continue; - - // Handle a struct and array indices which add their offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += OpC->getSExtValue()*Size; - } - } - - // Re-sign extend from the pointer size if needed to get overflow edge cases - // right. - unsigned PtrSize = TD.getPointerSizeInBits(); - if (PtrSize < 64) - Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); - - return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); -} - - /// AnalyzeLoadFromClobberingWrite - This function is called when we have a /// memdep query of a load that ends up being a clobbering memory write (store, /// memset, memcpy, memmove). This means that the write *may* provide bits used @@ -986,7 +772,7 @@ /// Check this case to see if there is anything more we can do before we give /// up. This returns -1 if we have to give up, or a byte number in the stored /// value of the piece that feeds the load. -static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, Value *WritePtr, uint64_t WriteSizeInBits, const TargetData &TD) { @@ -996,9 +782,8 @@ return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); - Value *LoadBase = - GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; @@ -1020,8 +805,6 @@ // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. - // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then - // remove this check, as it is duplicated with what we have below. uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); if ((WriteSizeInBits & 7) | (LoadSize & 7)) @@ -1063,21 +846,51 @@ /// AnalyzeLoadFromClobberingStore - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. -static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const TargetData &TD) { // Cannot handle reading from store of first-class aggregate yet. - if (DepSI->getOperand(0)->getType()->isStructTy() || - DepSI->getOperand(0)->getType()->isArrayTy()) + if (DepSI->getValueOperand()->getType()->isStructTy() || + DepSI->getValueOperand()->getType()->isArrayTy()) return -1; Value *StorePtr = DepSI->getPointerOperand(); - uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, TD); } -static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, +/// AnalyzeLoadFromClobberingLoad - This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, + LoadInst *DepLI, const TargetData &TD){ + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); + int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); + if (R != -1) return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); + if (Size == 0) return -1; + + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); +} + + + +static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemIntrinsic *MI, const TargetData &TD) { // If the mem operation is a non-constant size, we can't handle it. @@ -1099,7 +912,7 @@ Constant *Src = dyn_cast<Constant>(MTI->getSource()); if (Src == 0) return -1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject()); + GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD)); if (GV == 0 || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. @@ -1114,7 +927,7 @@ llvm::Type::getInt8PtrTy(Src->getContext())); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); if (ConstantFoldLoadFromConstPtr(Src, &TD)) return Offset; @@ -1124,11 +937,11 @@ /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means -/// that the store *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. +/// that the store provides bits used by the load but we the pointers don't +/// mustalias. Check this case to see if there is anything more we can do +/// before we give up. static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, - const Type *LoadTy, + Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); @@ -1160,10 +973,73 @@ return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering load. This means +/// that the load *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, + Type *LoadTy, Instruction *InsertPt, + GVN &gvn) { + const TargetData &TD = *gvn.getTargetData(); + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + if (Offset+LoadSize > SrcValSize) { + assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); + assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset+LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + Type *DestPTy = + IntegerType::get(LoadTy->getContext(), NewLoadSize*8); + DestPTy = PointerType::get(DestPTy, + cast<PointerType>(PtrVal->getType())->getAddressSpace()); + Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlignment()); + + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (TD.isBigEndian()) + RV = Builder.CreateLShr(RV, + NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + // We would like to use gvn.markInstructionForDeletion here, but we can't + // because the load is already memoized into the leader map table that GVN + // tracks. It is potentially possible to remove the load from the table, + // but then there all of the operations based on it would need to be + // rehashed. Just leave the dead load around. + gvn.getMemDep().removeInstruction(SrcVal); + SrcVal = NewLoad; + } + + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); +} + + /// GetMemInstValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, - const Type *LoadTy, Instruction *InsertPt, + Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = LoadTy->getContext(); uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; @@ -1210,7 +1086,7 @@ llvm::Type::getInt8PtrTy(Src->getContext())); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); return ConstantFoldLoadFromConstPtr(Src, &TD); } @@ -1222,11 +1098,12 @@ BasicBlock *BB; enum ValType { SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. MemIntrin // A memory intrinsic which is loaded from. }; /// V - The value that is live out of the block. - PointerIntPair<Value *, 1, ValType> Val; + PointerIntPair<Value *, 2, ValType> Val; /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; @@ -1251,37 +1128,69 @@ return Res; } + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + Value *getSimpleValue() const { assert(isSimpleValue() && "Wrong accessor"); return Val.getPointer(); } + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast<LoadInst>(Val.getPointer()); + } + MemIntrinsic *getMemIntrinValue() const { - assert(!isSimpleValue() && "Wrong accessor"); + assert(isMemIntrinValue() && "Wrong accessor"); return cast<MemIntrinsic>(Val.getPointer()); } /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(const Type *LoadTy, - const TargetData *TD) const { + Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { Value *Res; if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { + const TargetData *TD = gvn.getTargetData(); assert(TD && "Need target data to handle type mismatch case"); Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } } else { + const TargetData *TD = gvn.getTargetData(); + assert(TD && "Need target data to handle type mismatch case"); Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); } @@ -1289,28 +1198,27 @@ } }; -} +} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, - const TargetData *TD, - const DominatorTree &DT, - AliasAnalysis *AA) { + GVN &gvn) { // Check for the fully redundant, dominating load case. In this case, we can // just use the dominating value directly. if (ValuesPerBlock.size() == 1 && - DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) - return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); + gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, + LI->getParent())) + return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); - SSAUpdate.Initialize(LI); + SSAUpdate.Initialize(LI->getType(), LI->getName()); - const Type *LoadTy = LI->getType(); + Type *LoadTy = LI->getType(); for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { const AvailableValueInBlock &AV = ValuesPerBlock[i]; @@ -1319,16 +1227,30 @@ if (SSAUpdate.HasValueForBlock(BB)) continue; - SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); } // Perform PHI construction. Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); // If new PHI nodes were created, notify alias analysis. - if (V->getType()->isPointerTy()) + if (V->getType()->isPointerTy()) { + AliasAnalysis *AA = gvn.getAliasAnalysis(); + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); + + // Now that we've copied information to the new PHIs, scan through + // them again and inform alias analysis that we've added potentially + // escaping uses to any values that are operands to these PHIs. + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { + PHINode *P = NewPHIs[i]; + for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + AA->addEscapingUse(P->getOperandUse(jj)); + } + } + } return V; } @@ -1341,12 +1263,11 @@ /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI, - SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processNonLocalLoad(LoadInst *LI) { // Find the non-local dependencies of the load. SmallVector<NonLocalDepResult, 64> Deps; - MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), - Deps); + AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); + MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " // << Deps.size() << *LI << '\n'); @@ -1358,11 +1279,13 @@ // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { + if (Deps.size() == 1 + && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) + { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); - dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; + dbgs() << " has unknown dependencies\n"; ); return false; } @@ -1374,12 +1297,15 @@ SmallVector<AvailableValueInBlock, 16> ValuesPerBlock; SmallVector<BasicBlock*, 16> UnavailableBlocks; - const TargetData *TD = 0; - for (unsigned i = 0, e = Deps.size(); i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); + if (!DepInfo.isDef() && !DepInfo.isClobber()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + if (DepInfo.isClobber()) { // The address being loaded in this non-local block may not be the same as // the pointer operand of the load if PHI translation occurs. Make sure @@ -1390,25 +1316,41 @@ // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, *TD); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - DepSI->getOperand(0), + DepSI->getValueOperand(), Offset)); continue; } } } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI != LI && Address && TD) { + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), + LI->getPointerOperand(), + DepLI, *TD); + + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, + Offset)); + continue; + } + } + } // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, DepMI, *TD); @@ -1424,10 +1366,12 @@ continue; } + // DepInfo.isDef() here + Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || + if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, @@ -1438,13 +1382,10 @@ if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. - if (S->getOperand(0)->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); - + if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(), *TD)) { UnavailableBlocks.push_back(DepBB); continue; @@ -1452,16 +1393,13 @@ } ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - S->getOperand(0))); + S->getValueOperand())); continue; } if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable<TargetData>(); - // If the stored value is larger or equal to the loaded value, we can // reuse it. if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ @@ -1469,7 +1407,7 @@ continue; } } - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); continue; } @@ -1488,16 +1426,14 @@ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumGVNLoad; return true; } @@ -1517,8 +1453,8 @@ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) Blockers.insert(UnavailableBlocks[i]); - // Lets find first basic block with more than one predecessor. Walk backwards - // through predecessors if needed. + // Let's find the first basic block with more than one predecessor. Walk + // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; @@ -1531,26 +1467,19 @@ return false; if (Blockers.count(TmpBB)) return false; + + // If any of these blocks has more than one successor (i.e. if the edge we + // just traversed was critical), then there are other paths through this + // block along which the load may not be anticipated. Hoisting the load + // above this block would be adding the load to execution paths along + // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) - allSingleSucc = false; + return false; } assert(TmpBB); LoadBB = TmpBB; - // If we have a repl set with LI itself in it, this means we have a loop where - // at least one of the values is LI. Since this means that we won't be able - // to eliminate LI even if we insert uses in the other predecessors, we will - // end up increasing code size. Reject this by scanning for LI. - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - if (ValuesPerBlock[i].isSimpleValue() && - ValuesPerBlock[i].getSimpleValue() == LI) { - // Skip cases where LI is the only definition, even for EnableFullLoadPRE. - if (!EnableFullLoadPRE || e == 1) - return false; - } - } - // FIXME: It is extremely unclear what this loop is doing, other than // artificially restricting loadpre. if (isSinglePred) { @@ -1597,10 +1526,19 @@ << Pred->getName() << "': " << *LI << '\n'); return false; } + + if (LoadBB->isLandingPad()) { + DEBUG(dbgs() + << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); } } + if (!NeedToSplit.empty()) { toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); return false; @@ -1610,14 +1548,13 @@ unsigned NumUnavailablePreds = PredLoads.size(); assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - if (!EnableFullLoadPRE) { - // If this load is unavailable in multiple predecessors, reject it. - // FIXME: If we could restructure the CFG, we could make a common pred with - // all the preds that don't have an available LI and insert a new load into - // that one block. - if (NumUnavailablePreds != 1) + + // If this load is unavailable in multiple predecessors, reject it. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + if (NumUnavailablePreds != 1) return false; - } // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; @@ -1632,7 +1569,7 @@ // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getOperand(0), TD); + PHITransAddr Address(LI->getPointerOperand(), TD); Value *LoadPtr = 0; if (allSingleSucc) { LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, @@ -1646,7 +1583,7 @@ // we fail PRE. if (LoadPtr == 0) { DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " - << *LI->getOperand(0) << "\n"); + << *LI->getPointerOperand() << "\n"); CanDoPRE = false; break; } @@ -1655,8 +1592,8 @@ // @1 = getelementptr (i8* p, ... // test p and branch if == 0 // load @1 - // It is valid to have the getelementptr before the test, even if p can be 0, - // as getelementptr only does address arithmetic. + // It is valid to have the getelementptr before the test, even if p can + // be 0, as getelementptr only does address arithmetic. // If we are not pushing the value through any multiple-successor blocks // we do not have this case. Otherwise, check that the load is safe to // put anywhere; this can be improved, but should be conservatively safe. @@ -1673,8 +1610,11 @@ } if (!CanDoPRE) { - while (!NewInsts.empty()) - NewInsts.pop_back_val()->eraseFromParent(); + while (!NewInsts.empty()) { + Instruction *I = NewInsts.pop_back_val(); + if (MD) MD->removeInstruction(I); + I->eraseFromParent(); + } return false; } @@ -1700,9 +1640,16 @@ BasicBlock *UnavailablePred = I->first; Value *LoadPtr = I->second; - Value *NewLoad = new LoadInst(LoadPtr, false, - LI->getAlignment(), - UnavailablePred->getTerminator()); + Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + // Transfer the old load's TBAA tag to the new load. + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) + NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + + // Transfer DebugLoc. + NewLoad->setDebugLoc(LI->getDebugLoc()); // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, @@ -1712,33 +1659,37 @@ } // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumPRELoad; return true; } /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. -bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processLoad(LoadInst *L) { if (!MD) return false; - if (L->isVolatile()) + if (!L->isSimple()) return false; + if (L->use_empty()) { + markInstructionForDeletion(L); + return true; + } + // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); - // If the value isn't available, don't do anything! - if (Dep.isClobber()) { + // If we have a clobber and target data is around, see if this is a clobber + // that we can fix up through code synthesis. + if (Dep.isClobber() && TD) { // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1750,26 +1701,40 @@ // completely covers this load. This sort of thing can happen in bitfield // access code. Value *AvailVal = 0; - if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) - if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { - int Offset = AnalyzeLoadFromClobberingStore(L->getType(), - L->getPointerOperand(), - DepSI, *TD); - if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, - L->getType(), L, *TD); - } + if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, + L->getType(), L, *TD); + } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI == L) + return false; + + int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), + L->getPointerOperand(), + DepLI, *TD); + if (Offset != -1) + AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); + } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { - if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { - int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), - L->getPointerOperand(), - DepMI, *TD); - if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); - } + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); } if (AvailVal) { @@ -1780,14 +1745,16 @@ L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } - + } + + // If the value isn't available, don't do anything! + if (Dep.isClobber()) { DEBUG( - // fast print dep, using operator<< on instruction would be too slow + // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; WriteAsOperand(dbgs(), L); Instruction *I = Dep.getInst(); @@ -1798,18 +1765,27 @@ // If it is defined in another block, try harder. if (Dep.isNonLocal()) - return processNonLocalLoad(L, toErase); + return processNonLocalLoad(L); + + if (!Dep.isDef()) { + DEBUG( + // fast print dep, using operator<< on instruction is too slow. + dbgs() << "GVN: load "; + WriteAsOperand(dbgs(), L); + dbgs() << " has unknown dependence\n"; + ); + return false; + } Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { - Value *StoredVal = DepSI->getOperand(0); + Value *StoredVal = DepSI->getValueOperand(); // The store and load are to a must-aliased pointer, but they may not // actually have the same type. See if we know how to reuse the stored // value (depending on its type). - const TargetData *TD = 0; if (StoredVal->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable<TargetData>())) { + if (TD) { StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *TD); if (StoredVal == 0) @@ -1826,8 +1802,7 @@ L->replaceAllUsesWith(StoredVal); if (StoredVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1838,10 +1813,10 @@ // The loads are of a must-aliased pointer, but they may not actually have // the same type. See if we know how to reuse the previously loaded value // (depending on its type). - const TargetData *TD = 0; if (DepLI->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable<TargetData>())) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + if (TD) { + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), + L, *TD); if (AvailableVal == 0) return false; @@ -1856,8 +1831,7 @@ L->replaceAllUsesWith(AvailableVal); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1865,21 +1839,19 @@ // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocaInst>(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1888,110 +1860,267 @@ return false; } -Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { - DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB); - if (I == localAvail.end()) - return 0; - - ValueNumberScope *Locals = I->second; - while (Locals) { - DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); - if (I != Locals->table.end()) - return I->second; - Locals = Locals->parent; +// findLeader - In order to find a leader for a given value number at a +// specific basic block, we first obtain the list of all Values for that number, +// and then scan the list to find one whose block dominates the block in +// question. This is fast because dominator tree queries consist of only +// a few comparisons of DFS numbers. +Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { + LeaderTableEntry Vals = LeaderTable[num]; + if (!Vals.Val) return 0; + + Value *Val = 0; + if (DT->dominates(Vals.BB, BB)) { + Val = Vals.Val; + if (isa<Constant>(Val)) return Val; + } + + LeaderTableEntry* Next = Vals.Next; + while (Next) { + if (DT->dominates(Next->BB, BB)) { + if (isa<Constant>(Next->Val)) return Next->Val; + if (!Val) Val = Next->Val; + } + + Next = Next->Next; } - return 0; + return Val; } +/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the +/// use is dominated by the given basic block. Returns the number of uses that +/// were replaced. +unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, + BasicBlock *Root) { + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE; ) { + Instruction *User = cast<Instruction>(*UI); + unsigned OpNum = UI.getOperandNo(); + ++UI; -/// processInstruction - When calculating availability, handle an instruction -/// by inserting it into the appropriate sets -bool GVN::processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase) { - // Ignore dbg info intrinsics. - if (ISA_DEBUG_INFO_INTRINSIC(I)) + if (DT->dominates(Root, User->getParent())) { + User->setOperand(OpNum, To); + ++Count; + } + } + return Count; +} + +/// propagateEquality - The given values are known to be equal in every block +/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with +/// 'RHS' everywhere in the scope. Returns whether a change was made. +bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { + if (LHS == RHS) return false; + assert(LHS->getType() == RHS->getType() && "Equal but types differ!"); + + // Don't try to propagate equalities between constants. + if (isa<Constant>(LHS) && isa<Constant>(RHS)) return false; - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - bool Changed = processLoad(LI, toErase); + // Make sure that any constants are on the right-hand side. In general the + // best results are obtained by placing the longest lived value on the RHS. + if (isa<Constant>(LHS)) + std::swap(LHS, RHS); - if (!Changed) { - unsigned Num = VN.lookup_or_add(LI); - localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); - } + // If neither term is constant then bail out. This is not for correctness, + // it's just that the non-constant case is much less useful: it occurs just + // as often as the constant case but handling it hardly ever results in an + // improvement. + if (!isa<Constant>(RHS)) + return false; + // If value numbering later deduces that an instruction in the scope is equal + // to 'LHS' then ensure it will be turned into 'RHS'. + addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root); + + // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. + unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); + bool Changed = NumReplacements > 0; + NumGVNEqProp += NumReplacements; + + // Now try to deduce additional equalities from this one. For example, if the + // known equality was "(A != B)" == "false" then it follows that A and B are + // equal in the scope. Only boolean equalities with an explicit true or false + // RHS are currently supported. + if (!RHS->getType()->isIntegerTy(1)) + // Not a boolean equality - bail out. + return Changed; + ConstantInt *CI = dyn_cast<ConstantInt>(RHS); + if (!CI) + // RHS neither 'true' nor 'false' - bail out. + return Changed; + // Whether RHS equals 'true'. Otherwise it equals 'false'. + bool isKnownTrue = CI->isAllOnesValue(); + bool isKnownFalse = !isKnownTrue; + + // If "A && B" is known true then both A and B are known true. If "A || B" + // is known false then both A and B are known false. + Value *A, *B; + if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || + (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { + Changed |= propagateEquality(A, RHS, Root); + Changed |= propagateEquality(B, RHS, Root); return Changed; } - uint32_t NextNum = VN.getNextUnusedValueNumber(); - unsigned Num = VN.lookup_or_add(I); + // If we are propagating an equality like "(A == B)" == "true" then also + // propagate the equality A == B. + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { + // Only equality comparisons are supported. + if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || + (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) { + Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); + Changed |= propagateEquality(Op0, Op1, Root); + } + return Changed; + } + return Changed; +} + +/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return +/// true if every path from the entry block to 'Dst' passes via this edge. In +/// particular 'Dst' must not be reachable via another edge from 'Src'. +static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, + DominatorTree *DT) { + // First off, there must not be more than one edge from Src to Dst, there + // should be exactly one. So keep track of the number of times Src occurs + // as a predecessor of Dst and fail if it's more than once. Secondly, any + // other predecessors of Dst should be dominated by Dst (see logic below). + bool SawEdgeFromSrc = false; + for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + if (Pred == Src) { + // An edge from Src to Dst. + if (SawEdgeFromSrc) + // There are multiple edges from Src to Dst - fail. + return false; + SawEdgeFromSrc = true; + continue; + } + // If the predecessor is not dominated by Dst, then it must be possible to + // reach it either without passing through Src (and thus not via the edge) + // or by passing through Src but taking a different edge out of Src. Either + // way it is possible to reach Dst without passing via the edge, so fail. + if (!DT->dominates(Dst, *PI)) + return false; + } + assert(SawEdgeFromSrc && "No edge between these basic blocks!"); + + // Every path from the entry block to Dst must at some point pass to Dst from + // a predecessor that is not dominated by Dst. This predecessor can only be + // Src, since all others are dominated by Dst. As there is only one edge from + // Src to Dst, the path passes by this edge. + return true; +} + +/// processInstruction - When calculating availability, handle an instruction +/// by inserting it into the appropriate sets +bool GVN::processInstruction(Instruction *I) { + // Ignore dbg info intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + return false; + + // If the instruction can be easily simplified then do so now in preference + // to value numbering it. Value numbering often exposes redundancies, for + // example if it determines that %y is equal to %x then the instruction + // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. + if (Value *V = SimplifyInstruction(I, TD, DT)) { + I->replaceAllUsesWith(V); + if (MD && V->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + markInstructionForDeletion(I); + ++NumGVNSimpl; + return true; + } + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (processLoad(LI)) + return true; + + unsigned Num = VN.lookup_or_add(LI); + addToLeaderTable(Num, LI, LI->getParent()); + return false; + } + + // For conditional branches, we can perform simple conditional propagation on + // the condition value itself. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - if (!BI->isConditional() || isa<Constant>(BI->getCondition())) return false; Value *BranchCond = BI->getCondition(); - uint32_t CondVN = VN.lookup_or_add(BranchCond); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); + BasicBlock *Parent = BI->getParent(); + bool Changed = false; - if (TrueSucc->getSinglePredecessor()) - localAvail[TrueSucc]->table[CondVN] = - ConstantInt::getTrue(TrueSucc->getContext()); - if (FalseSucc->getSinglePredecessor()) - localAvail[FalseSucc]->table[CondVN] = - ConstantInt::getFalse(TrueSucc->getContext()); + if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) + Changed |= propagateEquality(BranchCond, + ConstantInt::getTrue(TrueSucc->getContext()), + TrueSucc); - return false; + if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) + Changed |= propagateEquality(BranchCond, + ConstantInt::getFalse(FalseSucc->getContext()), + FalseSucc); + + return Changed; + } + + // For switches, propagate the case values into the case destinations. + if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { + Value *SwitchCond = SI->getCondition(); + BasicBlock *Parent = SI->getParent(); + bool Changed = false; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + BasicBlock *Dst = SI->getSuccessor(i); + if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) + Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst); + } + return Changed; + } + + // Instructions with void type don't return a value, so there's + // no point in trying to find redudancies in them. + if (I->getType()->isVoidTy()) return false; + + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); // Allocations are always uniquely numbered, so we can save time and memory // by fast failing them. - } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { + addToLeaderTable(Num, I, I->getParent()); return false; } - // Collapse PHI nodes - if (PHINode* p = dyn_cast<PHINode>(I)) { - Value *constVal = CollapsePhi(p); - - if (constVal) { - p->replaceAllUsesWith(constVal); - if (MD && constVal->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(constVal); - VN.erase(p); - - toErase.push_back(p); - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - } - // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (Num == NextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - + if (Num == NextNum) { + addToLeaderTable(Num, I, I->getParent()); + return false; + } + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value *repl = lookupNumber(I->getParent(), Num)) { - // Remove it! - VN.erase(I); - I->replaceAllUsesWith(repl); - if (MD && repl->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); - return true; - - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + Value *repl = findLeader(I->getParent(), Num); + if (repl == 0) { + // Failure, just remember this instance for future use. + addToLeaderTable(Num, I, I->getParent()); + return false; } - - return false; + + // Remove it! + I->replaceAllUsesWith(repl); + if (MD && repl->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(repl); + markInstructionForDeletion(I); + return true; } /// runOnFunction - This is the main transformation entry point for a function. @@ -1999,6 +2128,7 @@ if (!NoLoads) MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); VN.setDomTree(DT); @@ -2009,8 +2139,8 @@ // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock *BB = FI; - ++FI; + BasicBlock *BB = FI++; + bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) ++NumGVNBlocks; @@ -2018,7 +2148,6 @@ } unsigned Iteration = 0; - while (ShouldContinue) { DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); ShouldContinue = iterateOnFunction(F); @@ -2047,35 +2176,36 @@ bool GVN::processBlock(BasicBlock *BB) { - // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and - // incrementing BI before processing an instruction). - SmallVector<Instruction*, 8> toErase; + // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function + // (and incrementing BI before processing an instruction). + assert(InstrsToErase.empty() && + "We expect InstrsToErase to be empty across iterations"); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - ChangedFunction |= processInstruction(BI, toErase); - if (toErase.empty()) { + ChangedFunction |= processInstruction(BI); + if (InstrsToErase.empty()) { ++BI; continue; } // If we need some instructions deleted, do it now. - NumGVNInstr += toErase.size(); + NumGVNInstr += InstrsToErase.size(); // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) --BI; - for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) { + for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(), + E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } - toErase.clear(); + InstrsToErase.clear(); if (AtStart) BI = BB->begin(); @@ -2098,6 +2228,9 @@ // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; + // Don't perform PRE on a landing pad. + if (CurrentBlock->isLandingPad()) continue; + for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; @@ -2106,7 +2239,7 @@ isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() || CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || - ISA_DEBUG_INFO_INTRINSIC(CurInst)) + isa<DbgInfoIntrinsic>(CurInst)) continue; // We don't currently value number ANY inline asm calls. @@ -2136,20 +2269,19 @@ if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(P)) { + } else if (!DT->dominates(&F.getEntryBlock(), P)) { NumWithout = 2; break; } - DenseMap<uint32_t, Value*>::iterator predV = - localAvail[P]->table.find(ValNo); - if (predV == localAvail[P]->table.end()) { + Value* predV = findLeader(P, ValNo); + if (predV == 0) { PREPred = P; ++NumWithout; - } else if (predV->second == CurInst) { + } else if (predV == CurInst) { NumWithout = 2; } else { - predMap[P] = predV->second; + predMap[P] = predV; ++NumWith; } } @@ -2184,7 +2316,7 @@ if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) continue; - if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { + if (Value *V = findLeader(PREPred, VN.lookup(Op))) { PREInstr->setOperand(i, V); } else { success = false; @@ -2203,29 +2335,43 @@ PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); + PREInstr->setDebugLoc(CurInst->getDebugLoc()); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); ++NumGVNPRE; // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), + pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); + PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), + CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; Phi->addIncoming(predMap[P], P); } VN.add(Phi, ValNo); - localAvail[CurrentBlock]->table[ValNo] = Phi; - + addToLeaderTable(ValNo, Phi, CurrentBlock); + Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); - if (MD && Phi->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(Phi); + if (Phi->getType()->isPointerTy()) { + // Because we have added a PHI-use of the pointer value, it has now + // "escaped" from alias analysis' perspective. We need to inform + // AA of this. + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; + ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); + } + + if (MD) + MD->invalidateCachedPointerInfo(Phi); + } VN.erase(CurInst); + removeFromLeaderTable(ValNo, CurInst, CurrentBlock); DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); @@ -2257,16 +2403,7 @@ /// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); - - for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - if (DI->getIDom()) - localAvail[DI->getBlock()] = - new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); - else - localAvail[DI->getBlock()] = new ValueNumberScope(0); - } - + // Top-down walk of the dominator tree bool Changed = false; #if 0 @@ -2286,11 +2423,8 @@ void GVN::cleanupGlobalSets() { VN.clear(); - - for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) - delete I->second; - localAvail.clear(); + LeaderTable.clear(); + TableAllocator.Reset(); } /// verifyRemoved - Verify that the specified instruction does not occur in our @@ -2300,17 +2434,14 @@ // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. - for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { - const ValueNumberScope *VNS = I->second; - - while (VNS) { - for (DenseMap<uint32_t, Value*>::const_iterator - II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { - assert(II->second != Inst && "Inst still in value numbering scope!"); - } - - VNS = VNS->parent; + for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator + I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { + const LeaderTableEntry *Node = &I->second; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); + + while (Node->Next) { + Node = Node->Next; + assert(Node->Val != Inst && "Inst still in value numbering scope!"); } } }
diff --git a/src/LLVM/lib/Transforms/Scalar/INSTALL.vcxproj b/src/LLVM/lib/Transforms/Scalar/INSTALL.vcxproj new file mode 100644 index 0000000..20e23b8 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Scalar/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/Scalar/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Scalar/IndVarSimplify.cpp b/src/LLVM/lib/Transforms/Scalar/IndVarSimplify.cpp new file mode 100644 index 0000000..75fa011 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -0,0 +1,1941 @@ +//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation analyzes and transforms the induction variables (and +// computations derived from them) into simpler forms suitable for subsequent +// analysis and transformation. +// +// If the trip count of a loop is computable, this pass also makes the following +// changes: +// 1. The exit condition for the loop is canonicalized to compare the +// induction value against the exit value. This turns loops like: +// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' +// 2. Any use outside of the loop of an expression derived from the indvar +// is changed to compute the derived value outside of the loop, eliminating +// the dependence on the exit value of the induction variable. If the only +// purpose of the loop is to compute the exit value of some derived +// expression, this transformation will make the loop dead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indvars" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Type.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumRemoved , "Number of aux indvars removed"); +STATISTIC(NumWidened , "Number of indvars widened"); +STATISTIC(NumInserted , "Number of canonical indvars added"); +STATISTIC(NumReplaced , "Number of exit values replaced"); +STATISTIC(NumLFTR , "Number of loop exit tests replaced"); +STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); +STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); + +namespace llvm { + cl::opt<bool> EnableIVRewrite( + "enable-iv-rewrite", cl::Hidden, + cl::desc("Enable canonical induction variable rewriting")); + + // Trip count verification can be enabled by default under NDEBUG if we + // implement a strong expression equivalence checker in SCEV. Until then, we + // use the verify-indvars flag, which may assert in some cases. + cl::opt<bool> VerifyIndvars( + "verify-indvars", cl::Hidden, + cl::desc("Verify the ScalarEvolution result after running indvars")); +} + +namespace { + class IndVarSimplify : public LoopPass { + IVUsers *IU; + LoopInfo *LI; + ScalarEvolution *SE; + DominatorTree *DT; + TargetData *TD; + + SmallVector<WeakVH, 16> DeadInsts; + bool Changed; + public: + + static char ID; // Pass identification, replacement for typeid + IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0), + Changed(false) { + initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + if (EnableIVRewrite) + AU.addRequired<IVUsers>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + if (EnableIVRewrite) + AU.addPreserved<IVUsers>(); + AU.setPreservesCFG(); + } + + private: + virtual void releaseMemory() { + DeadInsts.clear(); + } + + bool isValidRewrite(Value *FromVal, Value *ToVal); + + void HandleFloatingPointIV(Loop *L, PHINode *PH); + void RewriteNonIntegerIVs(Loop *L); + + void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); + + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); + + void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); + + Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, + PHINode *IndVar, SCEVExpander &Rewriter); + + void SinkUnusedInvariants(Loop *L); + }; +} + +char IndVarSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", + "Induction Variable Simplification", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(IVUsers) +INITIALIZE_PASS_END(IndVarSimplify, "indvars", + "Induction Variable Simplification", false, false) + +Pass *llvm::createIndVarSimplifyPass() { + return new IndVarSimplify(); +} + +/// isValidRewrite - Return true if the SCEV expansion generated by the +/// rewriter can replace the original value. SCEV guarantees that it +/// produces the same value, but the way it is produced may be illegal IR. +/// Ideally, this function will only be called for verification. +bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { + // If an SCEV expression subsumed multiple pointers, its expansion could + // reassociate the GEP changing the base pointer. This is illegal because the + // final address produced by a GEP chain must be inbounds relative to its + // underlying object. Otherwise basic alias analysis, among other things, + // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid + // producing an expression involving multiple pointers. Until then, we must + // bail out here. + // + // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject + // because it understands lcssa phis while SCEV does not. + Value *FromPtr = FromVal; + Value *ToPtr = ToVal; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) { + FromPtr = GEP->getPointerOperand(); + } + if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) { + ToPtr = GEP->getPointerOperand(); + } + if (FromPtr != FromVal || ToPtr != ToVal) { + // Quickly check the common case + if (FromPtr == ToPtr) + return true; + + // SCEV may have rewritten an expression that produces the GEP's pointer + // operand. That's ok as long as the pointer operand has the same base + // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the + // base of a recurrence. This handles the case in which SCEV expansion + // converts a pointer type recurrence into a nonrecurrent pointer base + // indexed by an integer recurrence. + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); + const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); + if (FromBase == ToBase) + return true; + + DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " + << *FromBase << " != " << *ToBase << "\n"); + + return false; + } + return true; +} + +/// Determine the insertion point for this user. By default, insert immediately +/// before the user. SCEVExpander or LICM will hoist loop invariants out of the +/// loop. For PHI nodes, there may be multiple uses, so compute the nearest +/// common dominator for the incoming blocks. +static Instruction *getInsertPointForUses(Instruction *User, Value *Def, + DominatorTree *DT) { + PHINode *PHI = dyn_cast<PHINode>(User); + if (!PHI) + return User; + + Instruction *InsertPt = 0; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + if (PHI->getIncomingValue(i) != Def) + continue; + + BasicBlock *InsertBB = PHI->getIncomingBlock(i); + if (!InsertPt) { + InsertPt = InsertBB->getTerminator(); + continue; + } + InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); + InsertPt = InsertBB->getTerminator(); + } + assert(InsertPt && "Missing phi operand"); + assert((!isa<Instruction>(Def) || + DT->dominates(cast<Instruction>(Def), InsertPt)) && + "def does not dominate all uses"); + return InsertPt; +} + +//===----------------------------------------------------------------------===// +// RewriteNonIntegerIVs and helpers. Prefer integer IVs. +//===----------------------------------------------------------------------===// + +/// ConvertToSInt - Convert APF to an integer, if possible. +static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { + bool isExact = false; + if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) + return false; + // See if we can convert this to an int64_t + uint64_t UIntVal; + if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, + &isExact) != APFloat::opOK || !isExact) + return false; + IntVal = UIntVal; + return true; +} + +/// HandleFloatingPointIV - If the loop has floating induction variable +/// then insert corresponding integer induction variable if possible. +/// For example, +/// for(double i = 0; i < 10000; ++i) +/// bar(i) +/// is converted into +/// for(int i = 0; i < 10000; ++i) +/// bar((double)i); +/// +void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { + unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; + + // Check incoming value. + ConstantFP *InitValueVal = + dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); + + int64_t InitValue; + if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) + return; + + // Check IV increment. Reject this PN if increment operation is not + // an add or increment value can not be represented by an integer. + BinaryOperator *Incr = + dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); + if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; + + // If this is not an add of the PHI with a constantfp, or if the constant fp + // is not an integer, bail out. + ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); + int64_t IncValue; + if (IncValueVal == 0 || Incr->getOperand(0) != PN || + !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) + return; + + // Check Incr uses. One user is PN and the other user is an exit condition + // used by the conditional terminator. + Value::use_iterator IncrUse = Incr->use_begin(); + Instruction *U1 = cast<Instruction>(*IncrUse++); + if (IncrUse == Incr->use_end()) return; + Instruction *U2 = cast<Instruction>(*IncrUse++); + if (IncrUse != Incr->use_end()) return; + + // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't + // only used by a branch, we can't transform it. + FCmpInst *Compare = dyn_cast<FCmpInst>(U1); + if (!Compare) + Compare = dyn_cast<FCmpInst>(U2); + if (Compare == 0 || !Compare->hasOneUse() || + !isa<BranchInst>(Compare->use_back())) + return; + + BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); + + // We need to verify that the branch actually controls the iteration count + // of the loop. If not, the new IV can overflow and no one will notice. + // The branch block must be in the loop and one of the successors must be out + // of the loop. + assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); + if (!L->contains(TheBr->getParent()) || + (L->contains(TheBr->getSuccessor(0)) && + L->contains(TheBr->getSuccessor(1)))) + return; + + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't + // transform it. + ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); + int64_t ExitValue; + if (ExitValueVal == 0 || + !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) + return; + + // Find new predicate for integer comparison. + CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; + switch (Compare->getPredicate()) { + default: return; // Unknown comparison. + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; + } + + // We convert the floating point induction variable to a signed i32 value if + // we can. This is only safe if the comparison will not overflow in a way + // that won't be trapped by the integer equivalent operations. Check for this + // now. + // TODO: We could use i64 if it is native and the range requires it. + + // The start/stride/exit values must all fit in signed i32. + if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) + return; + + // If not actually striding (add x, 0.0), avoid touching the code. + if (IncValue == 0) + return; + + // Positive and negative strides have different safety conditions. + if (IncValue > 0) { + // If we have a positive stride, we require the init to be less than the + // exit value. + if (InitValue >= ExitValue) + return; + + uint32_t Range = uint32_t(ExitValue-InitValue); + // Check for infinite loop, either: + // while (i <= Exit) or until (i > Exit) + if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { + if (++Range == 0) return; // Range overflows. + } + + unsigned Leftover = Range % uint32_t(IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) + return; + + } else { + // If we have a negative stride, we require the init to be greater than the + // exit value. + if (InitValue <= ExitValue) + return; + + uint32_t Range = uint32_t(InitValue-ExitValue); + // Check for infinite loop, either: + // while (i >= Exit) or until (i < Exit) + if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { + if (++Range == 0) return; // Range overflows. + } + + unsigned Leftover = Range % uint32_t(-IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) + return; + } + + IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); + + // Insert new integer induction variable. + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); + NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), + PN->getIncomingBlock(IncomingEdge)); + + Value *NewAdd = + BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), + Incr->getName()+".int", Incr); + NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); + + ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, + ConstantInt::get(Int32Ty, ExitValue), + Compare->getName()); + + // In the following deletions, PN may become dead and may be deleted. + // Use a WeakVH to observe whether this happens. + WeakVH WeakPH = PN; + + // Delete the old floating point exit comparison. The branch starts using the + // new comparison. + NewCompare->takeName(Compare); + Compare->replaceAllUsesWith(NewCompare); + RecursivelyDeleteTriviallyDeadInstructions(Compare); + + // Delete the old floating point increment. + Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); + RecursivelyDeleteTriviallyDeadInstructions(Incr); + + // If the FP induction variable still has uses, this is because something else + // in the loop uses its value. In order to canonicalize the induction + // variable, we chose to eliminate the IV and rewrite it in terms of an + // int->fp cast. + // + // We give preference to sitofp over uitofp because it is faster on most + // platforms. + if (WeakPH) { + Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", + PN->getParent()->getFirstInsertionPt()); + PN->replaceAllUsesWith(Conv); + RecursivelyDeleteTriviallyDeadInstructions(PN); + } + + // Add a new IVUsers entry for the newly-created integer PHI. + if (IU) + IU->AddUsersIfInteresting(NewPHI); + + Changed = true; +} + +void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { + // First step. Check to see if there are any floating-point recurrences. + // If there are, change them into integer recurrences, permitting analysis by + // the SCEV routines. + // + BasicBlock *Header = L->getHeader(); + + SmallVector<WeakVH, 8> PHIs; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) + HandleFloatingPointIV(L, PN); + + // If the loop previously had floating-point IV, ScalarEvolution + // may not have been able to compute a trip count. Now that we've done some + // re-writing, the trip count may be computable. + if (Changed) + SE->forgetLoop(L); +} + +//===----------------------------------------------------------------------===// +// RewriteLoopExitValues - Optimize IV users outside the loop. +// As a side effect, reduces the amount of IV processing within the loop. +//===----------------------------------------------------------------------===// + +/// RewriteLoopExitValues - Check to see if this loop has a computable +/// loop-invariant execution count. If so, this means that we can compute the +/// final value of any expressions that are recurrent in the loop, and +/// substitute the exit values from the loop into any instructions outside of +/// the loop that use the final values of the current expressions. +/// +/// This is mostly redundant with the regular IndVarSimplify activities that +/// happen later, except that it's more powerful in some cases, because it's +/// able to brute-force evaluate arbitrary instructions as long as they have +/// constant operands at the beginning of the loop. +void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { + // Verify the input to the pass in already in LCSSA form. + assert(L->isLCSSAForm(*DT)); + + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + + // Find all values that are computed inside the loop, but used outside of it. + // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan + // the exit blocks of the loop to find them. + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBB = ExitBlocks[i]; + + // If there are no PHI nodes in this exit block, then no values defined + // inside the loop are used on this path, skip it. + PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); + if (!PN) continue; + + unsigned NumPreds = PN->getNumIncomingValues(); + + // Iterate over all of the PHI nodes. + BasicBlock::iterator BBI = ExitBB->begin(); + while ((PN = dyn_cast<PHINode>(BBI++))) { + if (PN->use_empty()) + continue; // dead use, don't replace it + + // SCEV only supports integer expressions for now. + if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) + continue; + + // It's necessary to tell ScalarEvolution about this explicitly so that + // it can walk the def-use list and forget all SCEVs, as it may not be + // watching the PHI itself. Once the new exit value is in place, there + // may not be a def-use connection between the loop and every instruction + // which got a SCEVAddRecExpr for that loop. + SE->forgetValue(PN); + + // Iterate over all of the values in all the PHI nodes. + for (unsigned i = 0; i != NumPreds; ++i) { + // If the value being merged in is not integer or is not defined + // in the loop, skip it. + Value *InVal = PN->getIncomingValue(i); + if (!isa<Instruction>(InVal)) + continue; + + // If this pred is for a subloop, not L itself, skip it. + if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) + continue; // The Block is in a subloop, skip it. + + // Check that InVal is defined in the loop. + Instruction *Inst = cast<Instruction>(InVal); + if (!L->contains(Inst)) + continue; + + // Okay, this instruction has a user outside of the current loop + // and varies predictably *inside* the loop. Evaluate the value it + // contains when the loop exits, if possible. + const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + if (!SE->isLoopInvariant(ExitValue, L)) + continue; + + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); + + DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' + << " LoopVal = " << *Inst << "\n"); + + if (!isValidRewrite(Inst, ExitVal)) { + DeadInsts.push_back(ExitVal); + continue; + } + Changed = true; + ++NumReplaced; + + PN->setIncomingValue(i, ExitVal); + + // If this instruction is dead now, delete it. + RecursivelyDeleteTriviallyDeadInstructions(Inst); + + if (NumPreds == 1) { + // Completely replace a single-pred PHI. This is safe, because the + // NewVal won't be variant in the loop, so we don't need an LCSSA phi + // node anymore. + PN->replaceAllUsesWith(ExitVal); + RecursivelyDeleteTriviallyDeadInstructions(PN); + } + } + if (NumPreds != 1) { + // Clone the PHI and delete the original one. This lets IVUsers and + // any other maps purge the original user from their records. + PHINode *NewPN = cast<PHINode>(PN->clone()); + NewPN->takeName(PN); + NewPN->insertBefore(PN); + PN->replaceAllUsesWith(NewPN); + PN->eraseFromParent(); + } + } + } + + // The insertion point instruction may have been deleted; clear it out + // so that the rewriter doesn't trip over it later. + Rewriter.clearInsertPoint(); +} + +//===----------------------------------------------------------------------===// +// Rewrite IV users based on a canonical IV. +// Only for use with -enable-iv-rewrite. +//===----------------------------------------------------------------------===// + +/// FIXME: It is an extremely bad idea to indvar substitute anything more +/// complex than affine induction variables. Doing so will put expensive +/// polynomial evaluations inside of the loop, and the str reduction pass +/// currently can only reduce affine polynomials. For now just disable +/// indvar subst on anything more complex than an affine addrec, unless +/// it can be expanded to a trivial value. +static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { + // Loop-invariant values are safe. + if (SE->isLoopInvariant(S, L)) return true; + + // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how + // to transform them into efficient code. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + return AR->isAffine(); + + // An add is safe it all its operands are safe. + if (const SCEVCommutativeExpr *Commutative + = dyn_cast<SCEVCommutativeExpr>(S)) { + for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), + E = Commutative->op_end(); I != E; ++I) + if (!isSafe(*I, L, SE)) return false; + return true; + } + + // A cast is safe if its operand is. + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return isSafe(C->getOperand(), L, SE); + + // A udiv is safe if its operands are. + if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) + return isSafe(UD->getLHS(), L, SE) && + isSafe(UD->getRHS(), L, SE); + + // SCEVUnknown is always safe. + if (isa<SCEVUnknown>(S)) + return true; + + // Nothing else is safe. + return false; +} + +void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { + // Rewrite all induction variable expressions in terms of the canonical + // induction variable. + // + // If there were induction variables of other sizes or offsets, manually + // add the offsets to the primary induction variable and cast, avoiding + // the need for the code evaluation methods to insert induction variables + // of different sizes. + for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { + Value *Op = UI->getOperandValToReplace(); + Type *UseTy = Op->getType(); + Instruction *User = UI->getUser(); + + // Compute the final addrec to expand into code. + const SCEV *AR = IU->getReplacementExpr(*UI); + + // Evaluate the expression out of the loop, if possible. + if (!L->contains(UI->getUser())) { + const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); + if (SE->isLoopInvariant(ExitVal, L)) + AR = ExitVal; + } + + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!isSafe(AR, L, SE)) + continue; + + // Determine the insertion point for this user. By default, insert + // immediately before the user. The SCEVExpander class will automatically + // hoist loop invariants out of the loop. For PHI nodes, there may be + // multiple uses, so compute the nearest common dominator for the + // incoming blocks. + Instruction *InsertPt = getInsertPointForUses(User, Op, DT); + + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } + // Inform ScalarEvolution that this value is changing. The change doesn't + // affect its value, but it does potentially affect which use lists the + // value will be on after the replacement, which affects ScalarEvolution's + // ability to walk use lists and drop dangling pointers when a value is + // deleted. + SE->forgetValue(User); + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + if (Instruction *NewValI = dyn_cast<Instruction>(NewVal)) + NewValI->setDebugLoc(User->getDebugLoc()); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); + } +} + +//===----------------------------------------------------------------------===// +// IV Widening - Extend the width of an IV to cover its widest uses. +//===----------------------------------------------------------------------===// + +namespace { + // Collect information about induction variables that are used by sign/zero + // extend operations. This information is recorded by CollectExtend and + // provides the input to WidenIV. + struct WideIVInfo { + PHINode *NarrowIV; + Type *WidestNativeType; // Widest integer type created [sz]ext + bool IsSigned; // Was an sext user seen before a zext? + + WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} + }; + + class WideIVVisitor : public IVVisitor { + ScalarEvolution *SE; + const TargetData *TD; + + public: + WideIVInfo WI; + + WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV, + const TargetData *TData) : + SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; } + + // Implement the interface used by simplifyUsersOfIV. + virtual void visitCast(CastInst *Cast); + }; +} + +/// visitCast - Update information about the induction variable that is +/// extended by this sign or zero extend operation. This is used to determine +/// the final width of the IV before actually widening it. +void WideIVVisitor::visitCast(CastInst *Cast) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) + return; + + Type *Ty = Cast->getType(); + uint64_t Width = SE->getTypeSizeInBits(Ty); + if (TD && !TD->isLegalInteger(Width)) + return; + + if (!WI.WidestNativeType) { + WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); + WI.IsSigned = IsSigned; + return; + } + + // We extend the IV to satisfy the sign of its first user, arbitrarily. + if (WI.IsSigned != IsSigned) + return; + + if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) + WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); +} + +namespace { + +/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the +/// WideIV that computes the same value as the Narrow IV def. This avoids +/// caching Use* pointers. +struct NarrowIVDefUse { + Instruction *NarrowDef; + Instruction *NarrowUse; + Instruction *WideDef; + + NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} + + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): + NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} +}; + +/// WidenIV - The goal of this transform is to remove sign and zero extends +/// without creating any new induction variables. To do this, it creates a new +/// phi of the wider type and redirects all users, either removing extends or +/// inserting truncs whenever we stop propagating the type. +/// +class WidenIV { + // Parameters + PHINode *OrigPhi; + Type *WideType; + bool IsSigned; + + // Context + LoopInfo *LI; + Loop *L; + ScalarEvolution *SE; + DominatorTree *DT; + + // Result + PHINode *WidePhi; + Instruction *WideInc; + const SCEV *WideIncExpr; + SmallVectorImpl<WeakVH> &DeadInsts; + + SmallPtrSet<Instruction*,16> Widened; + SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; + +public: + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, + ScalarEvolution *SEv, DominatorTree *DTree, + SmallVectorImpl<WeakVH> &DI) : + OrigPhi(WI.NarrowIV), + WideType(WI.WidestNativeType), + IsSigned(WI.IsSigned), + LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), + SE(SEv), + DT(DTree), + WidePhi(0), + WideInc(0), + WideIncExpr(0), + DeadInsts(DI) { + assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + } + + PHINode *CreateWideIV(SCEVExpander &Rewriter); + +protected: + Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); + + Instruction *CloneIVUser(NarrowIVDefUse DU); + + const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); + + const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU); + + Instruction *WidenIVUse(NarrowIVDefUse DU); + + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); +}; +} // anonymous namespace + +/// isLoopInvariant - Perform a quick domtree based check for loop invariance +/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems +/// gratuitous for this purpose. +static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) { + Instruction *Inst = dyn_cast<Instruction>(V); + if (!Inst) + return true; + + return DT->properlyDominates(Inst->getParent(), L->getHeader()); +} + +Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use) { + // Set the debug location and conservative insertion point. + IRBuilder<> Builder(Use); + // Hoist the insertion point into loop preheaders as far as possible. + for (const Loop *L = LI->getLoopFor(Use->getParent()); + L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT); + L = L->getParentLoop()) + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); +} + +/// CloneIVUser - Instantiate a wide operation to replace a narrow +/// operation. This only needs to handle operations that can evaluation to +/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. +Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { + unsigned Opcode = DU.NarrowUse->getOpcode(); + switch (Opcode) { + default: + return 0; + case Instruction::Add: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n"); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know + // anything about the narrow operand yet so must insert a [sz]ext. It is + // probably loop invariant and will be folded or hoisted. If it actually + // comes from a widened IV, it should be removed during a future call to + // WidenIVUse. + Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef : + getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse); + Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef : + getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse); + + BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse); + BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), + LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(DU.NarrowUse); + Builder.Insert(WideBO); + if (const OverflowingBinaryOperator *OBO = + dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { + if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); + if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); + } + return WideBO; + } + llvm_unreachable(0); +} + +/// No-wrap operations can transfer sign extension of their result to their +/// operands. Generate the SCEV value for the widened operation without +/// actually modifying the IR yet. If the expression after extending the +/// operands is an AddRec for this loop, return it. +const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { + // Handle the common case of add<nsw/nuw> + if (DU.NarrowUse->getOpcode() != Instruction::Add) + return 0; + + // One operand (NarrowDef) has already been extended to WideDef. Now determine + // if extending the other will lead to a recurrence. + unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); + + const SCEV *ExtendOperExpr = 0; + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(DU.NarrowUse); + if (IsSigned && OBO->hasNoSignedWrap()) + ExtendOperExpr = SE->getSignExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else if(!IsSigned && OBO->hasNoUnsignedWrap()) + ExtendOperExpr = SE->getZeroExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else + return 0; + + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>( + SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr, + IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW)); + + if (!AddRec || AddRec->getLoop() != L) + return 0; + return AddRec; +} + +/// GetWideRecurrence - Is this instruction potentially interesting from +/// IVUsers' perspective after widening it's type? In other words, can the +/// extend be safely hoisted out of the loop with SCEV reducing the value to a +/// recurrence on the same loop. If so, return the sign or zero extended +/// recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { + if (!SE->isSCEVable(NarrowUse->getType())) + return 0; + + const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) + >= SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return 0; + } + + const SCEV *WideExpr = IsSigned ? + SE->getSignExtendExpr(NarrowExpr, WideType) : + SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return 0; + return AddRec; +} + +/// WidenIVUse - Determine whether an individual user of the narrow IV can be +/// widened. If so, return the wide clone of the user. +Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) { + + // Stop traversing the def-use chain at inner-loop phis or post-loop phis. + if (isa<PHINode>(DU.NarrowUse) && + LI->getLoopFor(DU.NarrowUse->getParent()) != L) + return 0; + + // Our raison d'etre! Eliminate sign and zero extension. + if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) { + Value *NewDef = DU.WideDef; + if (DU.NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(DU.NarrowUse); + NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *DU.NarrowUse << "\n"); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + NewDef = DU.NarrowUse; + } + } + if (NewDef != DU.NarrowUse) { + DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse + << " replaced by " << *DU.WideDef << "\n"); + ++NumElimExt; + DU.NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.push_back(DU.NarrowUse); + } + // Now that the extend is gone, we want to expose it's uses for potential + // further simplification. We don't need to directly inform SimplifyIVUsers + // of the new users, because their parent IV will be processed later as a + // new loop phi. If we preserved IVUsers analysis, we would also want to + // push the uses of WideDef here. + + // No further widening is needed. The deceased [sz]ext had done it for us. + return 0; + } + + // Does this user itself evaluate to a recurrence after widening? + const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse); + if (!WideAddRec) { + WideAddRec = GetExtendedOperandRecurrence(DU); + } + if (!WideAddRec) { + // This user does not evaluate to a recurence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); + return 0; + } + // Assume block terminators cannot evaluate to a recurrence. We can't to + // insert a Trunc after a terminator if there happens to be a critical edge. + assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && + "SCEV is not expected to evaluate a block terminator"); + + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = 0; + if (WideAddRec == WideIncExpr + && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT)) + WideUse = WideInc; + else { + WideUse = CloneIVUser(DU); + if (!WideUse) + return 0; + } + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simply throw away the newly created wide use. + if (WideAddRec != SE->getSCEV(WideUse)) { + DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse + << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); + DeadInsts.push_back(WideUse); + return 0; + } + + // Returning WideUse pushes it on the worklist. + return WideUse; +} + +/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers. +/// +void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + for (Value::use_iterator UI = NarrowDef->use_begin(), + UE = NarrowDef->use_end(); UI != UE; ++UI) { + Instruction *NarrowUse = cast<Instruction>(*UI); + + // Handle data flow merges and bizarre phi cycles. + if (!Widened.insert(NarrowUse)) + continue; + + NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef)); + } +} + +/// CreateWideIV - Process a single induction variable. First use the +/// SCEVExpander to create a wide induction variable that evaluates to the same +/// recurrence as the original narrow IV. Then use a worklist to forward +/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all +/// interesting IV users, the narrow IV will be isolated for removal by +/// DeleteDeadPHIs. +/// +/// It would be simpler to delete uses as they are processed, but we must avoid +/// invalidating SCEV expressions. +/// +PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { + // Is this phi an induction variable? + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); + if (!AddRec) + return NULL; + + // Widen the induction variable expression. + const SCEV *WideIVExpr = IsSigned ? + SE->getSignExtendExpr(AddRec, WideType) : + SE->getZeroExtendExpr(AddRec, WideType); + + assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && + "Expect the new IV expression to preserve its type"); + + // Can the IV be extended outside the loop without overflow? + AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); + if (!AddRec || AddRec->getLoop() != L) + return NULL; + + // An AddRec must have loop-invariant operands. Since this AddRec is + // materialized by a loop header phi, the expression cannot have any post-loop + // operands, so they must dominate the loop header. + assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && + SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) + && "Loop header phi recurrence inputs do not dominate the loop"); + + // The rewriter provides a value for the desired IV expression. This may + // either find an existing phi or materialize a new one. Either way, we + // expect a well-formed cyclic phi-with-increments. i.e. any operand not part + // of the phi-SCC dominates the loop entry. + Instruction *InsertPt = L->getHeader()->begin(); + WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); + + // Remembering the WideIV increment generated by SCEVExpander allows + // WidenIVUse to reuse it when widening the narrow IV's increment. We don't + // employ a general reuse mechanism because the call above is the only call to + // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + WideInc = + cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); + WideIncExpr = SE->getSCEV(WideInc); + } + + DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); + ++NumWidened; + + // Traverse the def-use chain using a worklist starting at the original IV. + assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); + + Widened.insert(OrigPhi); + pushNarrowIVUsers(OrigPhi, WidePhi); + + while (!NarrowIVUsers.empty()) { + NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); + + // Process a def-use edge. This may replace the use, so don't hold a + // use_iterator across it. + Instruction *WideUse = WidenIVUse(DU); + + // Follow all def-use edges from the previous narrow use. + if (WideUse) + pushNarrowIVUsers(DU.NarrowUse, WideUse); + + // WidenIVUse may have removed the def-use edge. + if (DU.NarrowDef->use_empty()) + DeadInsts.push_back(DU.NarrowDef); + } + return WidePhi; +} + +//===----------------------------------------------------------------------===// +// Simplification of IV users based on SCEV evaluation. +//===----------------------------------------------------------------------===// + + +/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV +/// users. Each successive simplification may push more users which may +/// themselves be candidates for simplification. +/// +/// Sign/Zero extend elimination is interleaved with IV simplification. +/// +void IndVarSimplify::SimplifyAndExtend(Loop *L, + SCEVExpander &Rewriter, + LPPassManager &LPM) { + SmallVector<WideIVInfo, 8> WideIVs; + + SmallVector<PHINode*, 8> LoopPhis; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + LoopPhis.push_back(cast<PHINode>(I)); + } + // Each round of simplification iterates through the SimplifyIVUsers worklist + // for all current phis, then determines whether any IVs can be + // widened. Widening adds new phis to LoopPhis, inducing another round of + // simplification on the wide IVs. + while (!LoopPhis.empty()) { + // Evaluate as many IV expressions as possible before widening any IVs. This + // forces SCEV to set no-wrap flags before evaluating sign/zero + // extension. The first time SCEV attempts to normalize sign/zero extension, + // the result becomes final. So for the most predictable results, we delay + // evaluation of sign/zero extend evaluation until needed, and avoid running + // other SCEV based analysis prior to SimplifyAndExtend. + do { + PHINode *CurrIV = LoopPhis.pop_back_val(); + + // Information about sign/zero extensions of CurrIV. + WideIVVisitor WIV(CurrIV, SE, TD); + + Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV); + + if (WIV.WI.WidestNativeType) { + WideIVs.push_back(WIV.WI); + } + } while(!LoopPhis.empty()); + + for (; !WideIVs.empty(); WideIVs.pop_back()) { + WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts); + if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { + Changed = true; + LoopPhis.push_back(WidePhi); + } + } + } +} + +//===----------------------------------------------------------------------===// +// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. +//===----------------------------------------------------------------------===// + +/// Check for expressions that ScalarEvolution generates to compute +/// BackedgeTakenInfo. If these expressions have not been reduced, then +/// expanding them may incur additional cost (albeit in the loop preheader). +static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, + ScalarEvolution *SE) { + // If the backedge-taken count is a UDiv, it's very likely a UDiv that + // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a + // precise expression, rather than a UDiv from the user's code. If we can't + // find a UDiv in the code with some simple searching, assume the former and + // forego rewriting the loop. + if (isa<SCEVUDivExpr>(S)) { + ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!OrigCond) return true; + const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); + R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); + if (R != S) { + const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); + L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); + if (L != S) + return true; + } + } + + if (EnableIVRewrite) + return false; + + // Recurse past add expressions, which commonly occur in the + // BackedgeTakenCount. They may already exist in program code, and if not, + // they are not too expensive rematerialize. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + if (isHighCostExpansion(*I, BI, SE)) + return true; + } + return false; + } + + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // If we haven't recognized an expensive SCEV patter, assume its an expression + // produced by program code. + return false; +} + +/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken +/// count expression can be safely and cheaply expanded into an instruction +/// sequence that can be used by LinearFunctionTestReplace. +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || + BackedgeTakenCount->isZero()) + return false; + + if (!L->getExitingBlock()) + return false; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return false; + + if (isHighCostExpansion(BackedgeTakenCount, BI, SE)) + return false; + + return true; +} + +/// getBackedgeIVType - Get the widest type used by the loop test after peeking +/// through Truncs. +/// +/// TODO: Unnecessary when ForceLFTR is removed. +static Type *getBackedgeIVType(Loop *L) { + if (!L->getExitingBlock()) + return 0; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return 0; + + Type *Ty = 0; + for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); + OI != OE; ++OI) { + assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); + TruncInst *Trunc = dyn_cast<TruncInst>(*OI); + if (!Trunc) + continue; + + return Trunc->getSrcTy(); + } + return Ty; +} + +/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop +/// invariant value to the phi. +static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { + Instruction *IncI = dyn_cast<Instruction>(IncV); + if (!IncI) + return 0; + + switch (IncI->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + break; + case Instruction::GetElementPtr: + // An IV counter must preserve its type. + if (IncI->getNumOperands() == 2) + break; + default: + return 0; + } + + PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(1), L, DT)) + return Phi; + return 0; + } + if (IncI->getOpcode() == Instruction::GetElementPtr) + return 0; + + // Allow add/sub to be commuted. + Phi = dyn_cast<PHINode>(IncI->getOperand(1)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(0), L, DT)) + return Phi; + } + return 0; +} + +/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show +/// that the current exit test is already sufficiently canonical. +static bool needsLFTR(Loop *L, DominatorTree *DT) { + assert(L->getExitingBlock() && "expected loop exit"); + + BasicBlock *LatchBlock = L->getLoopLatch(); + // Don't bother with LFTR if the loop is not properly simplified. + if (!LatchBlock) + return false; + + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + assert(BI && "expected exit branch"); + + // Do LFTR to simplify the exit condition to an ICMP. + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return true; + + // Do LFTR to simplify the exit ICMP to EQ/NE + ICmpInst::Predicate Pred = Cond->getPredicate(); + if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) + return true; + + // Look for a loop invariant RHS + Value *LHS = Cond->getOperand(0); + Value *RHS = Cond->getOperand(1); + if (!isLoopInvariant(RHS, L, DT)) { + if (!isLoopInvariant(LHS, L, DT)) + return true; + std::swap(LHS, RHS); + } + // Look for a simple IV counter LHS + PHINode *Phi = dyn_cast<PHINode>(LHS); + if (!Phi) + Phi = getLoopPhiForCounter(LHS, L, DT); + + if (!Phi) + return true; + + // Do LFTR if the exit condition's IV is *not* a simple counter. + Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch()); + return Phi != getLoopPhiForCounter(IncV, L, DT); +} + +/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to +/// be rewritten) loop exit test. +static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + + for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != IncV) return false; + } + + for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != Phi) return false; + } + return true; +} + +/// FindLoopCounter - Find an affine IV in canonical form. +/// +/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount +/// +/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. +/// This is difficult in general for SCEV because of potential overflow. But we +/// could at least handle constant BECounts. +static PHINode * +FindLoopCounter(Loop *L, const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) { + // I'm not sure how BECount could be a pointer type, but we definitely don't + // want to LFTR that. + if (BECount->getType()->isPointerTy()) + return 0; + + uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); + + Value *Cond = + cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition(); + + // Loop over all of the PHI nodes, looking for a simple counter. + PHINode *BestPhi = 0; + const SCEV *BestInit = 0; + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "needsLFTR should guarantee a loop latch"); + + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE->isSCEVable(Phi->getType())) + continue; + + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); + if (!AR || AR->getLoop() != L || !AR->isAffine()) + continue; + + // AR may be a pointer type, while BECount is an integer type. + // AR may be wider than BECount. With eq/ne tests overflow is immaterial. + // AR may not be a narrower type, or we may never exit. + uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); + if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth))) + continue; + + const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); + if (!Step || !Step->isOne()) + continue; + + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + if (getLoopPhiForCounter(IncV, L, DT) != Phi) + continue; + + const SCEV *Init = AR->getStart(); + + if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { + // Don't force a live loop counter if another IV can be used. + if (AlmostDeadIV(Phi, LatchBlock, Cond)) + continue; + + // Prefer to count-from-zero. This is a more "canonical" counter form. It + // also prefers integer to pointer IVs. + if (BestInit->isZero() != Init->isZero()) { + if (BestInit->isZero()) + continue; + } + // If two IVs both count from zero or both count from nonzero then the + // narrower is likely a dead phi that has been widened. Use the wider phi + // to allow the other to be eliminated. + if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) + continue; + } + BestPhi = Phi; + BestInit = Init; + } + return BestPhi; +} + +/// LinearFunctionTestReplace - This method rewrites the exit condition of the +/// loop to be a canonical != comparison against the incremented loop induction +/// variable. This pass is able to rewrite the exit tests of any loop where the +/// SCEV analysis can determine a loop-invariant trip count of the loop, which +/// is actually a much broader range than just linear tests. +Value *IndVarSimplify:: +LinearFunctionTestReplace(Loop *L, + const SCEV *BackedgeTakenCount, + PHINode *IndVar, + SCEVExpander &Rewriter) { + assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + + // LFTR can ignore IV overflow and truncate to the width of + // BECount. This avoids materializing the add(zext(add)) expression. + Type *CntTy = !EnableIVRewrite ? + BackedgeTakenCount->getType() : IndVar->getType(); + + const SCEV *IVLimit = BackedgeTakenCount; + + // If the exiting block is not the same as the backedge block, we must compare + // against the preincremented value, otherwise we prefer to compare against + // the post-incremented value. + Value *CmpIndVar; + if (L->getExitingBlock() == L->getLoopLatch()) { + // Add one to the "backedge-taken" count to get the trip count. + // If this addition may overflow, we have to be more pessimistic and + // cast the induction variable before doing the add. + const SCEV *N = + SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1)); + if (CntTy == IVLimit->getType()) + IVLimit = N; + else { + const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0); + if ((isa<SCEVConstant>(N) && !N->isZero()) || + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + IVLimit = SE->getTruncateOrZeroExtend(N, CntTy); + } else { + // Potential overflow. Cast before doing the add. + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); + IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1)); + } + } + // The BackedgeTaken expression contains the number of times that the + // backedge branches to the loop header. This is one less than the + // number of times the loop executes, so use the incremented indvar. + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + } else { + // We have to use the preincremented value... + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); + CmpIndVar = IndVar; + } + + // For unit stride, IVLimit = Start + BECount with 2's complement overflow. + // So for, non-zero start compute the IVLimit here. + bool isPtrIV = false; + Type *CmpTy = CntTy; + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); + assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); + if (!AR->getStart()->isZero()) { + assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); + const SCEV *IVInit = AR->getStart(); + + // For pointer types, sign extend BECount in order to materialize a GEP. + // Note that for without EnableIVRewrite, we never run SCEVExpander on a + // pointer type, because we must preserve the existing GEPs. Instead we + // directly generate a GEP later. + if (IVInit->getType()->isPointerTy()) { + isPtrIV = true; + CmpTy = SE->getEffectiveSCEVType(IVInit->getType()); + IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy); + } + // For integer types, truncate the IV before computing IVInit + BECount. + else { + if (SE->getTypeSizeInBits(IVInit->getType()) + > SE->getTypeSizeInBits(CmpTy)) + IVInit = SE->getTruncateExpr(IVInit, CmpTy); + + IVLimit = SE->getAddExpr(IVInit, IVLimit); + } + } + // Expand the code for the iteration count. + IRBuilder<> Builder(BI); + + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); + Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); + + // Create a gep for IVInit + IVLimit from on an existing pointer base. + assert(isPtrIV == IndVar->getType()->isPointerTy() && + "IndVar type must match IVInit type"); + if (isPtrIV) { + Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); + assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter"); + assert(SE->getSizeOfExpr( + cast<PointerType>(IVStart->getType())->getElementType())->isOne() + && "unit stride pointer IV must be i8*"); + + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit"); + Builder.SetInsertPoint(BI); + } + + // Insert a new icmp_ne or icmp_eq instruction before the branch. + ICmpInst::Predicate P; + if (L->contains(BI->getSuccessor(0))) + P = ICmpInst::ICMP_NE; + else + P = ICmpInst::ICMP_EQ; + + DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" + << " LHS:" << *CmpIndVar << '\n' + << " op:\t" + << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *ExitCnt << "\n" + << " Expr:\t" << *IVLimit << "\n"); + + if (SE->getTypeSizeInBits(CmpIndVar->getType()) + > SE->getTypeSizeInBits(CmpTy)) { + CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv"); + } + + Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); + Value *OrigCond = BI->getCondition(); + // It's tempting to use replaceAllUsesWith here to fully replace the old + // comparison, but that's not immediately safe, since users of the old + // comparison may not be dominated by the new comparison. Instead, just + // update the branch to use the new comparison; in the common case this + // will make old comparison dead. + BI->setCondition(Cond); + DeadInsts.push_back(OrigCond); + + ++NumLFTR; + Changed = true; + return Cond; +} + +//===----------------------------------------------------------------------===// +// SinkUnusedInvariants. A late subpass to cleanup loop preheaders. +//===----------------------------------------------------------------------===// + +/// If there's a single exit block, sink any loop-invariant values that +/// were defined in the preheader but not used inside the loop into the +/// exit block to reduce register pressure in the loop. +void IndVarSimplify::SinkUnusedInvariants(Loop *L) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) return; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return; + + Instruction *InsertPt = ExitBlock->getFirstInsertionPt(); + BasicBlock::iterator I = Preheader->getTerminator(); + while (I != Preheader->begin()) { + --I; + // New instructions were inserted at the end of the preheader. + if (isa<PHINode>(I)) + break; + + // Don't move instructions which might have side effects, since the side + // effects need to complete before instructions inside the loop. Also don't + // move instructions which might read memory, since the loop may modify + // memory. Note that it's okay if the instruction might have undefined + // behavior: LoopSimplify guarantees that the preheader dominates the exit + // block. + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) + continue; + + // Skip debug info intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + // Skip landingpad instructions. + if (isa<LandingPadInst>(I)) + continue; + + // Don't sink static AllocaInsts out of the entry block, which would + // turn them into dynamic allocas! + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (AI->isStaticAlloca()) + continue; + + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoop = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + User *U = *UI; + BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) { + unsigned i = + PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + UseBB = P->getIncomingBlock(i); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoop = true; + break; + } + } + + // If there is, the def must remain in the preheader. + if (UsedInLoop) + continue; + + // Otherwise, sink it to the exit block. + Instruction *ToMove = I; + bool Done = false; + + if (I != Preheader->begin()) { + // Skip debug info intrinsics. + do { + --I; + } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); + + if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) + Done = true; + } else { + Done = true; + } + + ToMove->moveBefore(InsertPt); + if (Done) break; + InsertPt = ToMove; + } +} + +//===----------------------------------------------------------------------===// +// IndVarSimplify driver. Manage several subpasses of IV simplification. +//===----------------------------------------------------------------------===// + +bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + // If LoopSimplify form is not available, stay out of trouble. Some notes: + // - LSR currently only supports LoopSimplify-form loops. Indvars' + // canonicalization can be a pessimization without LSR to "clean up" + // afterwards. + // - We depend on having a preheader; in particular, + // Loop::getCanonicalInductionVariable only supports loops with preheaders, + // and we're in trouble if we can't find the induction variable even when + // we've manually inserted one. + if (!L->isLoopSimplifyForm()) + return false; + + if (EnableIVRewrite) + IU = &getAnalysis<IVUsers>(); + LI = &getAnalysis<LoopInfo>(); + SE = &getAnalysis<ScalarEvolution>(); + DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + + DeadInsts.clear(); + Changed = false; + + // If there are any floating-point recurrences, attempt to + // transform them to use integer recurrences. + RewriteNonIntegerIVs(L); + + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + + // Create a rewriter object which we'll use to transform the code with. + SCEVExpander Rewriter(*SE, "indvars"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif + + // Eliminate redundant IV users. + // + // Simplification works best when run before other consumers of SCEV. We + // attempt to avoid evaluating SCEVs for sign/zero extend operations until + // other expressions involving loop IVs have been evaluated. This helps SCEV + // set no-wrap flags before normalizing sign/zero extension. + if (!EnableIVRewrite) { + Rewriter.disableCanonicalMode(); + SimplifyAndExtend(L, Rewriter, LPM); + } + + // Check to see if this loop has a computable loop-invariant execution count. + // If so, this means that we can compute the final value of any expressions + // that are recurrent in the loop, and substitute the exit values from the + // loop into any instructions outside of the loop that use the final values of + // the current expressions. + // + if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) + RewriteLoopExitValues(L, Rewriter); + + // Eliminate redundant IV users. + if (EnableIVRewrite) + Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts); + + // Eliminate redundant IV cycles. + if (!EnableIVRewrite) + NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); + + // Compute the type of the largest recurrence expression, and decide whether + // a canonical induction variable should be inserted. + Type *LargestType = 0; + bool NeedCannIV = false; + bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); + if (EnableIVRewrite && ExpandBECount) { + // If we have a known trip count and a single exit block, we'll be + // rewriting the loop exit test condition below, which requires a + // canonical induction variable. + NeedCannIV = true; + Type *Ty = BackedgeTakenCount->getType(); + if (!EnableIVRewrite) { + // In this mode, SimplifyIVUsers may have already widened the IV used by + // the backedge test and inserted a Trunc on the compare's operand. Get + // the wider type to avoid creating a redundant narrow IV only used by the + // loop test. + LargestType = getBackedgeIVType(L); + } + if (!LargestType || + SE->getTypeSizeInBits(Ty) > + SE->getTypeSizeInBits(LargestType)) + LargestType = SE->getEffectiveSCEVType(Ty); + } + if (EnableIVRewrite) { + for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + NeedCannIV = true; + Type *Ty = + SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); + if (!LargestType || + SE->getTypeSizeInBits(Ty) > + SE->getTypeSizeInBits(LargestType)) + LargestType = Ty; + } + } + + // Now that we know the largest of the induction variable expressions + // in this loop, insert a canonical induction variable of the largest size. + PHINode *IndVar = 0; + if (NeedCannIV) { + // Check to see if the loop already has any canonical-looking induction + // variables. If any are present and wider than the planned canonical + // induction variable, temporarily remove them, so that the Rewriter + // doesn't attempt to reuse them. + SmallVector<PHINode *, 2> OldCannIVs; + while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) { + if (SE->getTypeSizeInBits(OldCannIV->getType()) > + SE->getTypeSizeInBits(LargestType)) + OldCannIV->removeFromParent(); + else + break; + OldCannIVs.push_back(OldCannIV); + } + + IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType); + + ++NumInserted; + Changed = true; + DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); + + // Now that the official induction variable is established, reinsert + // any old canonical-looking variables after it so that the IR remains + // consistent. They will be deleted as part of the dead-PHI deletion at + // the end of the pass. + while (!OldCannIVs.empty()) { + PHINode *OldCannIV = OldCannIVs.pop_back_val(); + OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt()); + } + } + else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) { + IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); + } + // If we have a trip count expression, rewrite the loop's exit condition + // using it. We can currently only handle loops with a single exit. + Value *NewICmp = 0; + if (ExpandBECount && IndVar) { + // Check preconditions for proper SCEVExpander operation. SCEV does not + // express SCEVExpander's dependencies, such as LoopSimplify. Instead any + // pass that uses the SCEVExpander must do it. This does not work well for + // loop passes because SCEVExpander makes assumptions about all loops, while + // LoopPassManager only forces the current loop to be simplified. + // + // FIXME: SCEV expansion has no way to bail out, so the caller must + // explicitly check any assumptions made by SCEV. Brittle. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); + if (!AR || AR->getLoop()->getLoopPreheader()) + NewICmp = + LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); + } + // Rewrite IV-derived expressions. + if (EnableIVRewrite) + RewriteIVExpressions(L, Rewriter); + + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted in the loop below, causing the AssertingVH in the cache to + // trigger. + Rewriter.clear(); + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + + // The Rewriter may not be used from this point on. + + // Loop-invariant instructions in the preheader that aren't used in the + // loop may be sunk below the loop to reduce register pressure. + SinkUnusedInvariants(L); + + // For completeness, inform IVUsers of the IV use in the newly-created + // loop exit test instruction. + if (IU && NewICmp) { + ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp); + if (NewICmpInst) + IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0))); + } + // Clean up dead instructions. + Changed |= DeleteDeadPHIs(L->getHeader()); + // Check a post-condition. + assert(L->isLCSSAForm(*DT) && + "Indvars did not leave the loop in lcssa form!"); + + // Verify that LFTR, and any other change have not interfered with SCEV's + // ability to compute trip count. +#ifndef NDEBUG + if (!EnableIVRewrite && VerifyIndvars && + !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { + SE->forgetLoop(L); + const SCEV *NewBECount = SE->getBackedgeTakenCount(L); + if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < + SE->getTypeSizeInBits(NewBECount->getType())) + NewBECount = SE->getTruncateOrNoop(NewBECount, + BackedgeTakenCount->getType()); + else + BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, + NewBECount->getType()); + assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); + } +#endif + + return Changed; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/JumpThreading.cpp b/src/LLVM/lib/Transforms/Scalar/JumpThreading.cpp new file mode 100644 index 0000000..f410af3 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/JumpThreading.cpp
@@ -0,0 +1,1594 @@ +//===- JumpThreading.cpp - Thread control through conditional blocks ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Jump Threading pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jump-threading" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumThreads, "Number of jumps threaded"); +STATISTIC(NumFolds, "Number of terminators folded"); +STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); + +static cl::opt<unsigned> +Threshold("jump-threading-threshold", + cl::desc("Max block size to duplicate for jump threading"), + cl::init(6), cl::Hidden); + +namespace { + // These are at global scope so static functions can use them too. + typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo; + typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy; + + // This is used to keep track of what kind of constant we're currently hoping + // to find. + enum ConstantPreference { + WantInteger, + WantBlockAddress + }; + + /// This pass performs 'jump threading', which looks at blocks that have + /// multiple predecessors and multiple successors. If one or more of the + /// predecessors of the block can be proven to always jump to one of the + /// successors, we forward the edge from the predecessor to the successor by + /// duplicating the contents of this block. + /// + /// An example of when this can occur is code like this: + /// + /// if () { ... + /// X = 4; + /// } + /// if (X < 3) { + /// + /// In this case, the unconditional branch at the end of the first if can be + /// revectored to the false side of the second if. + /// + class JumpThreading : public FunctionPass { + TargetData *TD; + LazyValueInfo *LVI; +#ifdef NDEBUG + SmallPtrSet<BasicBlock*, 16> LoopHeaders; +#else + SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; +#endif + DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; + + // RAII helper for updating the recursion stack. + struct RecursionSetRemover { + DenseSet<std::pair<Value*, BasicBlock*> > &TheSet; + std::pair<Value*, BasicBlock*> ThePair; + + RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S, + std::pair<Value*, BasicBlock*> P) + : TheSet(S), ThePair(P) { } + + ~RecursionSetRemover() { + TheSet.erase(ThePair); + } + }; + public: + static char ID; // Pass identification + JumpThreading() : FunctionPass(ID) { + initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LazyValueInfo>(); + AU.addPreserved<LazyValueInfo>(); + } + + void FindLoopHeaders(Function &F); + bool ProcessBlock(BasicBlock *BB); + bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs, + BasicBlock *SuccBB); + bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + const SmallVectorImpl<BasicBlock *> &PredBBs); + + bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, + PredValueInfo &Result, + ConstantPreference Preference); + bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB, + ConstantPreference Preference); + + bool ProcessBranchOnPHI(PHINode *PN); + bool ProcessBranchOnXOR(BinaryOperator *BO); + + bool SimplifyPartiallyRedundantLoad(LoadInst *LI); + }; +} + +char JumpThreading::ID = 0; +INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", + "Jump Threading", false, false) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) +INITIALIZE_PASS_END(JumpThreading, "jump-threading", + "Jump Threading", false, false) + +// Public interface to the Jump Threading pass +FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } + +/// runOnFunction - Top level algorithm. +/// +bool JumpThreading::runOnFunction(Function &F) { + DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); + TD = getAnalysisIfAvailable<TargetData>(); + LVI = &getAnalysis<LazyValueInfo>(); + + FindLoopHeaders(F); + + bool Changed, EverChanged = false; + do { + Changed = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *BB = I; + // Thread all of the branches we can over this block. + while (ProcessBlock(BB)) + Changed = true; + + ++I; + + // If the block is trivially dead, zap it. This eliminates the successor + // edges which simplifies the CFG. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { + DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() + << "' with terminator: " << *BB->getTerminator() << '\n'); + LoopHeaders.erase(BB); + LVI->eraseBlock(BB); + DeleteDeadBlock(BB); + Changed = true; + continue; + } + + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + + // Can't thread an unconditional jump, but if the block is "almost + // empty", we can replace uses of it with uses of the successor and make + // this dead. + if (BI && BI->isUnconditional() && + BB != &BB->getParent()->getEntryBlock() && + // If the terminator is the only non-phi instruction, try to nuke it. + BB->getFirstNonPHIOrDbg()->isTerminator()) { + // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the + // block, we have to make sure it isn't in the LoopHeaders set. We + // reinsert afterward if needed. + bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); + BasicBlock *Succ = BI->getSuccessor(0); + + // FIXME: It is always conservatively correct to drop the info + // for a block even if it doesn't get erased. This isn't totally + // awesome, but it allows us to use AssertingVH to prevent nasty + // dangling pointer issues within LazyValueInfo. + LVI->eraseBlock(BB); + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { + Changed = true; + // If we deleted BB and BB was the header of a loop, then the + // successor is now the header of the loop. + BB = Succ; + } + + if (ErasedFromLoopHeaders) + LoopHeaders.insert(BB); + } + } + EverChanged |= Changed; + } while (Changed); + + LoopHeaders.clear(); + return EverChanged; +} + +/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to +/// thread across it. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { + /// Ignore PHI nodes, these will be flattened when duplication happens. + BasicBlock::const_iterator I = BB->getFirstNonPHI(); + + // FIXME: THREADING will delete values that are just used to compute the + // branch, so they shouldn't count against the duplication cost. + + + // Sum up the cost of each instruction until we get to the terminator. Don't + // include the terminator because the copy won't include it. + unsigned Size = 0; + for (; !isa<TerminatorInst>(I); ++I) { + // Debugger intrinsics don't incur code size. + if (isa<DbgInfoIntrinsic>(I)) continue; + + // If this is a pointer->pointer bitcast, it is free. + if (isa<BitCastInst>(I) && I->getType()->isPointerTy()) + continue; + + // All other instructions count for at least one unit. + ++Size; + + // Calls are more expensive. If they are non-intrinsic calls, we model them + // as having cost of 4. If they are a non-vector intrinsic, we model them + // as having cost of 2 total, and if they are a vector intrinsic, we model + // them as having cost 1. + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + if (!isa<IntrinsicInst>(CI)) + Size += 3; + else if (!CI->getType()->isVectorTy()) + Size += 1; + } + } + + // Threading through a switch statement is particularly profitable. If this + // block ends in a switch, decrease its cost to make it more likely to happen. + if (isa<SwitchInst>(I)) + Size = Size > 6 ? Size-6 : 0; + + // The same holds for indirect branches, but slightly more so. + if (isa<IndirectBrInst>(I)) + Size = Size > 8 ? Size-8 : 0; + + return Size; +} + +/// FindLoopHeaders - We do not want jump threading to turn proper loop +/// structures into irreducible loops. Doing this breaks up the loop nesting +/// hierarchy and pessimizes later transformations. To prevent this from +/// happening, we first have to find the loop headers. Here we approximate this +/// by finding targets of backedges in the CFG. +/// +/// Note that there definitely are cases when we want to allow threading of +/// edges across a loop header. For example, threading a jump from outside the +/// loop (the preheader) to an exit block of the loop is definitely profitable. +/// It is also almost always profitable to thread backedges from within the loop +/// to exit blocks, and is often profitable to thread backedges to other blocks +/// within the loop (forming a nested loop). This simple analysis is not rich +/// enough to track all of these properties and keep it up-to-date as the CFG +/// mutates, so we don't allow any of these transformations. +/// +void JumpThreading::FindLoopHeaders(Function &F) { + SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; + FindFunctionBackedges(F, Edges); + + for (unsigned i = 0, e = Edges.size(); i != e; ++i) + LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); +} + +/// getKnownConstant - Helper method to determine if we can thread over a +/// terminator with the given value as its condition, and if so what value to +/// use for that. What kind of value this is depends on whether we want an +/// integer or a block address, but an undef is always accepted. +/// Returns null if Val is null or not an appropriate constant. +static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { + if (!Val) + return 0; + + // Undef is "known" enough. + if (UndefValue *U = dyn_cast<UndefValue>(Val)) + return U; + + if (Preference == WantBlockAddress) + return dyn_cast<BlockAddress>(Val->stripPointerCasts()); + + return dyn_cast<ConstantInt>(Val); +} + +/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see +/// if we can infer that the value is a known ConstantInt/BlockAddress or undef +/// in any of our predecessors. If so, return the known list of value and pred +/// BB in the result vector. +/// +/// This returns true if there were any known values. +/// +bool JumpThreading:: +ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, + ConstantPreference Preference) { + // This method walks up use-def chains recursively. Because of this, we could + // get into an infinite loop going around loops in the use-def chain. To + // prevent this, keep track of what (value, block) pairs we've already visited + // and terminate the search if we loop back to them + if (!RecursionSet.insert(std::make_pair(V, BB)).second) + return false; + + // An RAII help to remove this pair from the recursion set once the recursion + // stack pops back out again. + RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); + + // If V is a constant, then it is known in all predecessors. + if (Constant *KC = getKnownConstant(V, Preference)) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(KC, *PI)); + + return true; + } + + // If V is a non-instruction value, or an instruction in a different block, + // then it can't be derived from a PHI. + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || I->getParent() != BB) { + + // Okay, if this is a live-in value, see if it has a known value at the end + // of any of our predecessors. + // + // FIXME: This should be an edge property, not a block end property. + /// TODO: Per PR2563, we could infer value range information about a + /// predecessor based on its terminator. + // + // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if + // "I" is a non-local compare-with-a-constant instruction. This would be + // able to handle value inequalities better, for example if the compare is + // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. + // Perhaps getConstantOnEdge should be smart enough to do this? + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); + if (Constant *KC = getKnownConstant(PredCst, Preference)) + Result.push_back(std::make_pair(KC, P)); + } + + return !Result.empty(); + } + + /// If I is a PHI node, then we know the incoming values for any constants. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + if (Constant *KC = getKnownConstant(InVal, Preference)) { + Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); + } else { + Constant *CI = LVI->getConstantOnEdge(InVal, + PN->getIncomingBlock(i), BB); + if (Constant *KC = getKnownConstant(CI, Preference)) + Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); + } + } + + return !Result.empty(); + } + + PredValueInfoTy LHSVals, RHSVals; + + // Handle some boolean conditions. + if (I->getType()->getPrimitiveSizeInBits() == 1) { + assert(Preference == WantInteger && "One-bit non-integer type?"); + // X | true -> true + // X & false -> false + if (I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::And) { + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, + WantInteger); + ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, + WantInteger); + + if (LHSVals.empty() && RHSVals.empty()) + return false; + + ConstantInt *InterestingVal; + if (I->getOpcode() == Instruction::Or) + InterestingVal = ConstantInt::getTrue(I->getContext()); + else + InterestingVal = ConstantInt::getFalse(I->getContext()); + + SmallPtrSet<BasicBlock*, 4> LHSKnownBBs; + + // Scan for the sentinel. If we find an undef, force it to the + // interesting value: x|undef -> true and x&undef -> false. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) + if (LHSVals[i].first == InterestingVal || + isa<UndefValue>(LHSVals[i].first)) { + Result.push_back(LHSVals[i]); + Result.back().first = InterestingVal; + LHSKnownBBs.insert(LHSVals[i].second); + } + for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) + if (RHSVals[i].first == InterestingVal || + isa<UndefValue>(RHSVals[i].first)) { + // If we already inferred a value for this block on the LHS, don't + // re-add it. + if (!LHSKnownBBs.count(RHSVals[i].second)) { + Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } + } + + return !Result.empty(); + } + + // Handle the NOT form of XOR. + if (I->getOpcode() == Instruction::Xor && + isa<ConstantInt>(I->getOperand(1)) && + cast<ConstantInt>(I->getOperand(1))->isOne()) { + ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, + WantInteger); + if (Result.empty()) + return false; + + // Invert the known values. + for (unsigned i = 0, e = Result.size(); i != e; ++i) + Result[i].first = ConstantExpr::getNot(Result[i].first); + + return true; + } + + // Try to simplify some other binary operator values. + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + assert(Preference != WantBlockAddress + && "A binary operator creating a block address?"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { + PredValueInfoTy LHSVals; + ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals, + WantInteger); + + // Try to use constant folding to simplify the binary operator. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first; + Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); + + if (Constant *KC = getKnownConstant(Folded, WantInteger)) + Result.push_back(std::make_pair(KC, LHSVals[i].second)); + } + } + + return !Result.empty(); + } + + // Handle compare with phi operand, where the PHI is defined in this block. + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + assert(Preference == WantInteger && "Compares only produce integers"); + PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); + if (PN && PN->getParent() == BB) { + // We can do this simplification if any comparisons fold to true or false. + // See if any do. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = PN->getIncomingBlock(i); + Value *LHS = PN->getIncomingValue(i); + Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); + + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD); + if (Res == 0) { + if (!isa<Constant>(RHS)) + continue; + + LazyValueInfo::Tristate + ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS, + cast<Constant>(RHS), PredBB, BB); + if (ResT == LazyValueInfo::Unknown) + continue; + Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); + } + + if (Constant *KC = getKnownConstant(Res, WantInteger)) + Result.push_back(std::make_pair(KC, PredBB)); + } + + return !Result.empty(); + } + + + // If comparing a live-in value against a constant, see if we know the + // live-in value on any predecessors. + if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) { + if (!isa<Instruction>(Cmp->getOperand(0)) || + cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { + Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ + BasicBlock *P = *PI; + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + LazyValueInfo::Tristate Res = + LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + RHSCst, P, BB); + if (Res == LazyValueInfo::Unknown) + continue; + + Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Result.push_back(std::make_pair(ResC, P)); + } + + return !Result.empty(); + } + + // Try to find a constant value for the LHS of a comparison, + // and evaluate it statically if we can. + if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) { + PredValueInfoTy LHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, + WantInteger); + + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first; + Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), + V, CmpConst); + if (Constant *KC = getKnownConstant(Folded, WantInteger)) + Result.push_back(std::make_pair(KC, LHSVals[i].second)); + } + + return !Result.empty(); + } + } + } + + if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + // Handle select instructions where at least one operand is a known constant + // and we can figure out the condition value for any predecessor block. + Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference); + Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference); + PredValueInfoTy Conds; + if ((TrueVal || FalseVal) && + ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, + WantInteger)) { + for (unsigned i = 0, e = Conds.size(); i != e; ++i) { + Constant *Cond = Conds[i].first; + + // Figure out what value to use for the condition. + bool KnownCond; + if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) { + // A known boolean. + KnownCond = CI->isOne(); + } else { + assert(isa<UndefValue>(Cond) && "Unexpected condition value"); + // Either operand will do, so be sure to pick the one that's a known + // constant. + // FIXME: Do this more cleverly if both values are known constants? + KnownCond = (TrueVal != 0); + } + + // See if the select has a known constant value for this predecessor. + if (Constant *Val = KnownCond ? TrueVal : FalseVal) + Result.push_back(std::make_pair(Val, Conds[i].second)); + } + + return !Result.empty(); + } + } + + // If all else fails, see if LVI can figure out a constant value for us. + Constant *CI = LVI->getConstant(V, BB); + if (Constant *KC = getKnownConstant(CI, Preference)) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(KC, *PI)); + } + + return !Result.empty(); +} + + + +/// GetBestDestForBranchOnUndef - If we determine that the specified block ends +/// in an undefined jump, decide which block is best to revector to. +/// +/// Since we can pick an arbitrary destination, we pick the successor with the +/// fewest predecessors. This should reduce the in-degree of the others. +/// +static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { + TerminatorInst *BBTerm = BB->getTerminator(); + unsigned MinSucc = 0; + BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); + // Compute the successor with the minimum number of predecessors. + unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { + TestBB = BBTerm->getSuccessor(i); + unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + if (NumPreds < MinNumPreds) { + MinSucc = i; + MinNumPreds = NumPreds; + } + } + + return MinSucc; +} + +static bool hasAddressTakenAndUsed(BasicBlock *BB) { + if (!BB->hasAddressTaken()) return false; + + // If the block has its address taken, it may be a tree of dead constants + // hanging off of it. These shouldn't keep the block alive. + BlockAddress *BA = BlockAddress::get(BB); + BA->removeDeadConstantUsers(); + return !BA->use_empty(); +} + +/// ProcessBlock - If there are any predecessors whose control can be threaded +/// through to a successor, transform them now. +bool JumpThreading::ProcessBlock(BasicBlock *BB) { + // If the block is trivially dead, just return and let the caller nuke it. + // This simplifies other transformations. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) + return false; + + // If this block has a single predecessor, and if that pred has a single + // successor, merge the blocks. This encourages recursive jump threading + // because now the condition in this block can be threaded through + // predecessors of our predecessor block. + if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { + if (SinglePred->getTerminator()->getNumSuccessors() == 1 && + SinglePred != BB && !hasAddressTakenAndUsed(BB)) { + // If SinglePred was a loop header, BB becomes one. + if (LoopHeaders.erase(SinglePred)) + LoopHeaders.insert(BB); + + // Remember if SinglePred was the entry block of the function. If so, we + // will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + LVI->eraseBlock(SinglePred); + MergeBasicBlockIntoOnlyPred(BB); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + return true; + } + } + + // What kind of constant we're looking for. + ConstantPreference Preference = WantInteger; + + // Look to see if the terminator is a conditional branch, switch or indirect + // branch, if not we can't thread it. + Value *Condition; + Instruction *Terminator = BB->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) { + // Can't thread an unconditional jump. + if (BI->isUnconditional()) return false; + Condition = BI->getCondition(); + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) { + Condition = SI->getCondition(); + } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) { + Condition = IB->getAddress()->stripPointerCasts(); + Preference = WantBlockAddress; + } else { + return false; // Must be an invoke. + } + + // Run constant folding to see if we can reduce the condition to a simple + // constant. + if (Instruction *I = dyn_cast<Instruction>(Condition)) { + Value *SimpleVal = ConstantFoldInstruction(I, TD); + if (SimpleVal) { + I->replaceAllUsesWith(SimpleVal); + I->eraseFromParent(); + Condition = SimpleVal; + } + } + + // If the terminator is branching on an undef, we can pick any of the + // successors to branch to. Let GetBestDestForJumpOnUndef decide. + if (isa<UndefValue>(Condition)) { + unsigned BestSucc = GetBestDestForJumpOnUndef(BB); + + // Fold the branch/switch. + TerminatorInst *BBTerm = BB->getTerminator(); + for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { + if (i == BestSucc) continue; + BBTerm->getSuccessor(i)->removePredecessor(BB, true); + } + + DEBUG(dbgs() << " In block '" << BB->getName() + << "' folding undef terminator: " << *BBTerm << '\n'); + BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); + BBTerm->eraseFromParent(); + return true; + } + + // If the terminator of this block is branching on a constant, simplify the + // terminator to an unconditional branch. This can occur due to threading in + // other blocks. + if (getKnownConstant(Condition, Preference)) { + DEBUG(dbgs() << " In block '" << BB->getName() + << "' folding terminator: " << *BB->getTerminator() << '\n'); + ++NumFolds; + ConstantFoldTerminator(BB, true); + return true; + } + + Instruction *CondInst = dyn_cast<Instruction>(Condition); + + // All the rest of our checks depend on the condition being an instruction. + if (CondInst == 0) { + // FIXME: Unify this with code below. + if (ProcessThreadableEdges(Condition, BB, Preference)) + return true; + return false; + } + + + if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { + // For a comparison where the LHS is outside this block, it's possible + // that we've branched on it before. Used LVI to see if we can simplify + // the branch based on that. + BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); + Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1)); + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (CondBr && CondConst && CondBr->isConditional() && PI != PE && + (!isa<Instruction>(CondCmp->getOperand(0)) || + cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) { + // For predecessor edge, determine if the comparison is true or false + // on that edge. If they're all true or all false, we can simplify the + // branch. + // FIXME: We could handle mixed true/false by duplicating code. + LazyValueInfo::Tristate Baseline = + LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0), + CondConst, *PI, BB); + if (Baseline != LazyValueInfo::Unknown) { + // Check that all remaining incoming values match the first one. + while (++PI != PE) { + LazyValueInfo::Tristate Ret = + LVI->getPredicateOnEdge(CondCmp->getPredicate(), + CondCmp->getOperand(0), CondConst, *PI, BB); + if (Ret != Baseline) break; + } + + // If we terminated early, then one of the values didn't match. + if (PI == PE) { + unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0; + unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1; + CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); + BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); + CondBr->eraseFromParent(); + return true; + } + } + } + } + + // Check for some cases that are worth simplifying. Right now we want to look + // for loads that are used by a switch or by the condition for the branch. If + // we see one, check to see if it's partially redundant. If so, insert a PHI + // which can then be used to thread the values. + // + Value *SimplifyValue = CondInst; + if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) + if (isa<Constant>(CondCmp->getOperand(1))) + SimplifyValue = CondCmp->getOperand(0); + + // TODO: There are other places where load PRE would be profitable, such as + // more complex comparisons. + if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) + if (SimplifyPartiallyRedundantLoad(LI)) + return true; + + + // Handle a variety of cases where we are branching on something derived from + // a PHI node in the current block. If we can prove that any predecessors + // compute a predictable value based on a PHI node, thread those predecessors. + // + if (ProcessThreadableEdges(CondInst, BB, Preference)) + return true; + + // If this is an otherwise-unfoldable branch on a phi node in the current + // block, see if we can simplify. + if (PHINode *PN = dyn_cast<PHINode>(CondInst)) + if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) + return ProcessBranchOnPHI(PN); + + + // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. + if (CondInst->getOpcode() == Instruction::Xor && + CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator())) + return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst)); + + + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know + // "(X == 4)", thread through this block. + + return false; +} + + +/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant +/// load instruction, eliminate it by replacing it with a PHI node. This is an +/// important optimization that encourages jump threading, and needs to be run +/// interlaced with other jump threading tasks. +bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { + // Don't hack volatile/atomic loads. + if (!LI->isSimple()) return false; + + // If the load is defined in a block with exactly one predecessor, it can't be + // partially redundant. + BasicBlock *LoadBB = LI->getParent(); + if (LoadBB->getSinglePredecessor()) + return false; + + Value *LoadedPtr = LI->getOperand(0); + + // If the loaded operand is defined in the LoadBB, it can't be available. + // TODO: Could do simple PHI translation, that would be fun :) + if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr)) + if (PtrOp->getParent() == LoadBB) + return false; + + // Scan a few instructions up from the load, to see if it is obviously live at + // the entry to its block. + BasicBlock::iterator BBIt = LI; + + if (Value *AvailableVal = + FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) { + // If the value if the load is locally available within the block, just use + // it. This frequently occurs for reg2mem'd allocas. + //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; + + // If the returned value is the load itself, replace with an undef. This can + // only happen in dead loops. + if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(AvailableVal); + LI->eraseFromParent(); + return true; + } + + // Otherwise, if we scanned the whole block and got to the top of the block, + // we know the block is locally transparent to the load. If not, something + // might clobber its value. + if (BBIt != LoadBB->begin()) + return false; + + + SmallPtrSet<BasicBlock*, 8> PredsScanned; + typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; + AvailablePredsTy AvailablePreds; + BasicBlock *OneUnavailablePred = 0; + + // If we got here, the loaded value is transparent through to the start of the + // block. Check to see if it is available in any of the predecessor blocks. + for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); + PI != PE; ++PI) { + BasicBlock *PredBB = *PI; + + // If we already scanned this predecessor, skip it. + if (!PredsScanned.insert(PredBB)) + continue; + + // Scan the predecessor to see if the value is available in the pred. + BBIt = PredBB->end(); + Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6); + if (!PredAvailable) { + OneUnavailablePred = PredBB; + continue; + } + + // If so, this load is partially redundant. Remember this info so that we + // can create a PHI node. + AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable)); + } + + // If the loaded value isn't available in any predecessor, it isn't partially + // redundant. + if (AvailablePreds.empty()) return false; + + // Okay, the loaded value is available in at least one (and maybe all!) + // predecessors. If the value is unavailable in more than one unique + // predecessor, we want to insert a merge block for those common predecessors. + // This ensures that we only have to insert one reload, thus not increasing + // code size. + BasicBlock *UnavailablePred = 0; + + // If there is exactly one predecessor where the value is unavailable, the + // already computed 'OneUnavailablePred' block is it. If it ends in an + // unconditional branch, we know that it isn't a critical edge. + if (PredsScanned.size() == AvailablePreds.size()+1 && + OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) { + UnavailablePred = OneUnavailablePred; + } else if (PredsScanned.size() != AvailablePreds.size()) { + // Otherwise, we had multiple unavailable predecessors or we had a critical + // edge from the one. + SmallVector<BasicBlock*, 8> PredsToSplit; + SmallPtrSet<BasicBlock*, 8> AvailablePredSet; + + for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i) + AvailablePredSet.insert(AvailablePreds[i].first); + + // Add all the unavailable predecessors to the PredsToSplit list. + for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); + PI != PE; ++PI) { + BasicBlock *P = *PI; + // If the predecessor is an indirect goto, we can't split the edge. + if (isa<IndirectBrInst>(P->getTerminator())) + return false; + + if (!AvailablePredSet.count(P)) + PredsToSplit.push_back(P); + } + + // Split them out to their own block. + UnavailablePred = + SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), + "thread-pre-split", this); + } + + // If the value isn't available in all predecessors, then there will be + // exactly one where it isn't available. Insert a load on that edge and add + // it to the AvailablePreds list. + if (UnavailablePred) { + assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && + "Can't handle critical edge here!"); + LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + NewVal->setDebugLoc(LI->getDebugLoc()); + AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); + } + + // Now we know that each predecessor of this block has a value in + // AvailablePreds, sort them for efficient access as we're walking the preds. + array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); + + // Create a PHI node at the start of the block for the PRE'd load value. + pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); + PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", + LoadBB->begin()); + PN->takeName(LI); + PN->setDebugLoc(LI->getDebugLoc()); + + // Insert new entries into the PHI for each predecessor. A single block may + // have multiple entries here. + for (pred_iterator PI = PB; PI != PE; ++PI) { + BasicBlock *P = *PI; + AvailablePredsTy::iterator I = + std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), + std::make_pair(P, (Value*)0)); + + assert(I != AvailablePreds.end() && I->first == P && + "Didn't find entry for predecessor!"); + + PN->addIncoming(I->second, I->first); + } + + //cerr << "PRE: " << *LI << *PN << "\n"; + + LI->replaceAllUsesWith(PN); + LI->eraseFromParent(); + + return true; +} + +/// FindMostPopularDest - The specified list contains multiple possible +/// threadable destinations. Pick the one that occurs the most frequently in +/// the list. +static BasicBlock * +FindMostPopularDest(BasicBlock *BB, + const SmallVectorImpl<std::pair<BasicBlock*, + BasicBlock*> > &PredToDestList) { + assert(!PredToDestList.empty()); + + // Determine popularity. If there are multiple possible destinations, we + // explicitly choose to ignore 'undef' destinations. We prefer to thread + // blocks with known and real destinations to threading undef. We'll handle + // them later if interesting. + DenseMap<BasicBlock*, unsigned> DestPopularity; + for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) + if (PredToDestList[i].second) + DestPopularity[PredToDestList[i].second]++; + + // Find the most popular dest. + DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); + BasicBlock *MostPopularDest = DPI->first; + unsigned Popularity = DPI->second; + SmallVector<BasicBlock*, 4> SamePopularity; + + for (++DPI; DPI != DestPopularity.end(); ++DPI) { + // If the popularity of this entry isn't higher than the popularity we've + // seen so far, ignore it. + if (DPI->second < Popularity) + ; // ignore. + else if (DPI->second == Popularity) { + // If it is the same as what we've seen so far, keep track of it. + SamePopularity.push_back(DPI->first); + } else { + // If it is more popular, remember it. + SamePopularity.clear(); + MostPopularDest = DPI->first; + Popularity = DPI->second; + } + } + + // Okay, now we know the most popular destination. If there is more than one + // destination, we need to determine one. This is arbitrary, but we need + // to make a deterministic decision. Pick the first one that appears in the + // successor list. + if (!SamePopularity.empty()) { + SamePopularity.push_back(MostPopularDest); + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0; ; ++i) { + assert(i != TI->getNumSuccessors() && "Didn't find any successor!"); + + if (std::find(SamePopularity.begin(), SamePopularity.end(), + TI->getSuccessor(i)) == SamePopularity.end()) + continue; + + MostPopularDest = TI->getSuccessor(i); + break; + } + } + + // Okay, we have finally picked the most popular destination. + return MostPopularDest; +} + +bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, + ConstantPreference Preference) { + // If threading this would thread across a loop header, don't even try to + // thread the edge. + if (LoopHeaders.count(BB)) + return false; + + PredValueInfoTy PredValues; + if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference)) + return false; + + assert(!PredValues.empty() && + "ComputeValueKnownInPredecessors returned true with no values"); + + DEBUG(dbgs() << "IN BB: " << *BB; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + dbgs() << " BB '" << BB->getName() << "': FOUND condition = " + << *PredValues[i].first + << " for pred '" << PredValues[i].second->getName() << "'.\n"; + }); + + // Decide what we want to thread through. Convert our list of known values to + // a list of known destinations for each pred. This also discards duplicate + // predecessors and keeps track of the undefined inputs (which are represented + // as a null dest in the PredToDestList). + SmallPtrSet<BasicBlock*, 16> SeenPreds; + SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList; + + BasicBlock *OnlyDest = 0; + BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + BasicBlock *Pred = PredValues[i].second; + if (!SeenPreds.insert(Pred)) + continue; // Duplicate predecessor entry. + + // If the predecessor ends with an indirect goto, we can't change its + // destination. + if (isa<IndirectBrInst>(Pred->getTerminator())) + continue; + + Constant *Val = PredValues[i].first; + + BasicBlock *DestBB; + if (isa<UndefValue>(Val)) + DestBB = 0; + else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); + else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) + DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val))); + else { + assert(isa<IndirectBrInst>(BB->getTerminator()) + && "Unexpected terminator"); + DestBB = cast<BlockAddress>(Val)->getBasicBlock(); + } + + // If we have exactly one destination, remember it for efficiency below. + if (PredToDestList.empty()) + OnlyDest = DestBB; + else if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + + PredToDestList.push_back(std::make_pair(Pred, DestBB)); + } + + // If all edges were unthreadable, we fail. + if (PredToDestList.empty()) + return false; + + // Determine which is the most common successor. If we have many inputs and + // this block is a switch, we want to start by threading the batch that goes + // to the most popular destination first. If we only know about one + // threadable destination (the common case) we can avoid this. + BasicBlock *MostPopularDest = OnlyDest; + + if (MostPopularDest == MultipleDestSentinel) + MostPopularDest = FindMostPopularDest(BB, PredToDestList); + + // Now that we know what the most popular destination is, factor all + // predecessors that will jump to it into a single predecessor. + SmallVector<BasicBlock*, 16> PredsToFactor; + for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) + if (PredToDestList[i].second == MostPopularDest) { + BasicBlock *Pred = PredToDestList[i].first; + + // This predecessor may be a switch or something else that has multiple + // edges to the block. Factor each of these edges by listing them + // according to # occurrences in PredsToFactor. + TerminatorInst *PredTI = Pred->getTerminator(); + for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i) + if (PredTI->getSuccessor(i) == BB) + PredsToFactor.push_back(Pred); + } + + // If the threadable edges are branching on an undefined value, we get to pick + // the destination that these predecessors should get to. + if (MostPopularDest == 0) + MostPopularDest = BB->getTerminator()-> + getSuccessor(GetBestDestForJumpOnUndef(BB)); + + // Ok, try to thread it! + return ThreadEdge(BB, PredsToFactor, MostPopularDest); +} + +/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on +/// a PHI node in the current block. See if there are any simplifications we +/// can do based on inputs to the phi node. +/// +bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { + BasicBlock *BB = PN->getParent(); + + // TODO: We could make use of this to do it once for blocks with common PHI + // values. + SmallVector<BasicBlock*, 1> PredBBs; + PredBBs.resize(1); + + // If any of the predecessor blocks end in an unconditional branch, we can + // *duplicate* the conditional branch into that block in order to further + // encourage jump threading and to eliminate cases where we have branch on a + // phi of an icmp (branch on icmp is much better). + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = PN->getIncomingBlock(i); + if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) + if (PredBr->isUnconditional()) { + PredBBs[0] = PredBB; + // Try to duplicate BB into PredBB. + if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs)) + return true; + } + } + + return false; +} + +/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on +/// a xor instruction in the current block. See if there are any +/// simplifications we can do based on inputs to the xor. +/// +bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { + BasicBlock *BB = BO->getParent(); + + // If either the LHS or RHS of the xor is a constant, don't do this + // optimization. + if (isa<ConstantInt>(BO->getOperand(0)) || + isa<ConstantInt>(BO->getOperand(1))) + return false; + + // If the first instruction in BB isn't a phi, we won't be able to infer + // anything special about any particular predecessor. + if (!isa<PHINode>(BB->front())) + return false; + + // If we have a xor as the branch input to this block, and we know that the + // LHS or RHS of the xor in any predecessor is true/false, then we can clone + // the condition into the predecessor and fix that value to true, saving some + // logical ops on that path and encouraging other paths to simplify. + // + // This copies something like this: + // + // BB: + // %X = phi i1 [1], [%X'] + // %Y = icmp eq i32 %A, %B + // %Z = xor i1 %X, %Y + // br i1 %Z, ... + // + // Into: + // BB': + // %Y = icmp ne i32 %A, %B + // br i1 %Z, ... + + PredValueInfoTy XorOpValues; + bool isLHS = true; + if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues, + WantInteger)) { + assert(XorOpValues.empty()); + if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues, + WantInteger)) + return false; + isLHS = false; + } + + assert(!XorOpValues.empty() && + "ComputeValueKnownInPredecessors returned true with no values"); + + // Scan the information to see which is most popular: true or false. The + // predecessors can be of the set true, false, or undef. + unsigned NumTrue = 0, NumFalse = 0; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (isa<UndefValue>(XorOpValues[i].first)) + // Ignore undefs for the count. + continue; + if (cast<ConstantInt>(XorOpValues[i].first)->isZero()) + ++NumFalse; + else + ++NumTrue; + } + + // Determine which value to split on, true, false, or undef if neither. + ConstantInt *SplitVal = 0; + if (NumTrue > NumFalse) + SplitVal = ConstantInt::getTrue(BB->getContext()); + else if (NumTrue != 0 || NumFalse != 0) + SplitVal = ConstantInt::getFalse(BB->getContext()); + + // Collect all of the blocks that this can be folded into so that we can + // factor this once and clone it once. + SmallVector<BasicBlock*, 8> BlocksToFoldInto; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (XorOpValues[i].first != SplitVal && + !isa<UndefValue>(XorOpValues[i].first)) + continue; + + BlocksToFoldInto.push_back(XorOpValues[i].second); + } + + // If we inferred a value for all of the predecessors, then duplication won't + // help us. However, we can just replace the LHS or RHS with the constant. + if (BlocksToFoldInto.size() == + cast<PHINode>(BB->front()).getNumIncomingValues()) { + if (SplitVal == 0) { + // If all preds provide undef, just nuke the xor, because it is undef too. + BO->replaceAllUsesWith(UndefValue::get(BO->getType())); + BO->eraseFromParent(); + } else if (SplitVal->isZero()) { + // If all preds provide 0, replace the xor with the other input. + BO->replaceAllUsesWith(BO->getOperand(isLHS)); + BO->eraseFromParent(); + } else { + // If all preds provide 1, set the computed value to 1. + BO->setOperand(!isLHS, SplitVal); + } + + return true; + } + + // Try to duplicate BB into PredBB. + return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); +} + + +/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new +/// predecessor to the PHIBB block. If it has PHI nodes, add entries for +/// NewPred using the entries from OldPred (suitably mapped). +static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, + BasicBlock *OldPred, + BasicBlock *NewPred, + DenseMap<Instruction*, Value*> &ValueMap) { + for (BasicBlock::iterator PNI = PHIBB->begin(); + PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) { + // Ok, we have a PHI node. Figure out what the incoming value was for the + // DestBlock. + Value *IV = PN->getIncomingValueForBlock(OldPred); + + // Remap the value if necessary. + if (Instruction *Inst = dyn_cast<Instruction>(IV)) { + DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst); + if (I != ValueMap.end()) + IV = I->second; + } + + PN->addIncoming(IV, NewPred); + } +} + +/// ThreadEdge - We have decided that it is safe and profitable to factor the +/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB +/// across BB. Transform the IR to reflect this change. +bool JumpThreading::ThreadEdge(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &PredBBs, + BasicBlock *SuccBB) { + // If threading to the same block as we come from, we would infinite loop. + if (SuccBB == BB) { + DEBUG(dbgs() << " Not threading across BB '" << BB->getName() + << "' - would thread to self!\n"); + return false; + } + + // If threading this would thread across a loop header, don't thread the edge. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) { + DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName() + << "' to dest BB '" << SuccBB->getName() + << "' - it might create an irreducible loop!\n"); + return false; + } + + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DEBUG(dbgs() << " Not threading BB '" << BB->getName() + << "' - Cost is too high: " << JumpThreadCost << "\n"); + return false; + } + + // And finally, do it! Start by factoring the predecessors is needed. + BasicBlock *PredBB; + if (PredBBs.size() == 1) + PredBB = PredBBs[0]; + else { + DEBUG(dbgs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); + PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), + ".thr_comm", this); + } + + // And finally, do it! + DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '" + << SuccBB->getName() << "' with cost: " << JumpThreadCost + << ", across block:\n " + << *BB << "\n"); + + LVI->threadEdge(PredBB, BB, SuccBB); + + // We are going to have to map operands from the original BB block to the new + // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to + // account for entry from PredBB. + DenseMap<Instruction*, Value*> ValueMapping; + + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), + BB->getName()+".thread", + BB->getParent(), BB); + NewBB->moveAfter(PredBB); + + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + // Clone the non-phi instructions of BB into NewBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; !isa<TerminatorInst>(BI); ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + NewBB->getInstList().push_back(New); + ValueMapping[BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { + DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + } + + // We didn't copy the terminator from BB over to NewBB, because there is now + // an unconditional jump to SuccBB. Insert the unconditional jump. + BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB); + NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc()); + + // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the + // PHI nodes for NewBB now. + AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector<Use*, 16> UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); + } + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I->getType(), I->getName()); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(dbgs() << "\n"); + } + + + // Ok, NewBB is good to go. Update the terminator of PredBB to jump to + // NewBB instead of BB. This eliminates predecessors from BB, which requires + // us to simplify any PHI nodes in BB. + TerminatorInst *PredTerm = PredBB->getTerminator(); + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) + if (PredTerm->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB, true); + PredTerm->setSuccessor(i, NewBB); + } + + // At this point, the IR is fully up to date and consistent. Do a quick scan + // over the new instructions and zap any that are constants or dead. This + // frequently happens because of phi translation. + SimplifyInstructionsInBlock(NewBB, TD); + + // Threaded an edge! + ++NumThreads; + return true; +} + +/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch +/// to BB which contains an i1 PHI node and a conditional branch on that PHI. +/// If we can duplicate the contents of BB up into PredBB do so now, this +/// improves the odds that the branch will be on an analyzable instruction like +/// a compare. +bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + const SmallVectorImpl<BasicBlock *> &PredBBs) { + assert(!PredBBs.empty() && "Can't handle an empty set"); + + // If BB is a loop header, then duplicating this block outside the loop would + // cause us to transform this into an irreducible loop, don't do this. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) { + DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() + << "' into predecessor block '" << PredBBs[0]->getName() + << "' - it might create an irreducible loop!\n"); + return false; + } + + unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); + if (DuplicationCost > Threshold) { + DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() + << "' - Cost is too high: " << DuplicationCost << "\n"); + return false; + } + + // And finally, do it! Start by factoring the predecessors is needed. + BasicBlock *PredBB; + if (PredBBs.size() == 1) + PredBB = PredBBs[0]; + else { + DEBUG(dbgs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); + PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), + ".thr_comm", this); + } + + // Okay, we decided to do this! Clone all the instructions in BB onto the end + // of PredBB. + DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" + << PredBB->getName() << "' to eliminate branch on phi. Cost: " + << DuplicationCost << " block is:" << *BB << "\n"); + + // Unless PredBB ends with an unconditional branch, split the edge so that we + // can just clone the bits from BB into the end of the new PredBB. + BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); + + if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) { + PredBB = SplitEdge(PredBB, BB, this); + OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); + } + + // We are going to have to map operands from the original BB block into the + // PredBB block. Evaluate PHI nodes in BB. + DenseMap<Instruction*, Value*> ValueMapping; + + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + // Clone the non-phi instructions of BB into PredBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; BI != BB->end(); ++BI) { + Instruction *New = BI->clone(); + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { + DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + + // If this instruction can be simplified after the operands are updated, + // just use the simplified value instead. This frequently happens due to + // phi translation. + if (Value *IV = SimplifyInstruction(New, TD)) { + delete New; + ValueMapping[BI] = IV; + } else { + // Otherwise, insert the new instruction into the block. + New->setName(BI->getName()); + PredBB->getInstList().insert(OldPredBranch, New); + ValueMapping[BI] = New; + } + } + + // Check to see if the targets of the branch had PHI nodes. If so, we need to + // add entries to the PHI nodes for branch from PredBB now. + BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator()); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, + ValueMapping); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, + ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector<Use*, 16> UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); + } + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I->getType(), I->getName()); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(dbgs() << "\n"); + } + + // PredBB no longer jumps to BB, remove entries in the PHI node for the edge + // that we nuked. + BB->removePredecessor(PredBB, true); + + // Remove the unconditional branch at the end of the PredBB block. + OldPredBranch->eraseFromParent(); + + ++NumDupes; + return true; +} + +
diff --git a/src/LLVM/lib/Transforms/Scalar/LICM.cpp b/src/LLVM/lib/Transforms/Scalar/LICM.cpp index c655de5..b79bb13 100644 --- a/src/LLVM/lib/Transforms/Scalar/LICM.cpp +++ b/src/LLVM/lib/Transforms/Scalar/LICM.cpp
@@ -26,8 +26,7 @@ // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use -// the mem2reg functionality to construct the appropriate SSA form for the -// variable. +// the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// @@ -37,14 +36,15 @@ #include "llvm/DerivedTypes.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -66,7 +66,9 @@ namespace { struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid - LICM() : LoopPass(ID) {} + LICM() : LoopPass(ID) { + initializeLICMPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -76,38 +78,30 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg) AU.addRequired<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); AU.addRequired<AliasAnalysis>(); - AU.addPreserved<ScalarEvolution>(); - AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved("scalar-evolution"); AU.addPreservedID(LoopSimplifyID); } bool doFinalization() { - // Free the values stored in the map - for (std::map<Loop *, AliasSetTracker *>::iterator - I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I) - delete I->second; - - LoopToAliasMap.clear(); + assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); return false; } private: - // Various analyses that we use... AliasAnalysis *AA; // Current AliasAnalysis information LoopInfo *LI; // Current LoopInfo - DominatorTree *DT; // Dominator Tree for the current Loop... - DominanceFrontier *DF; // Current Dominance Frontier + DominatorTree *DT; // Dominator Tree for the current Loop. - // State that is updated as we process loops + // State that is updated as we process loops. bool Changed; // Set to true when we change anything. BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... - std::map<Loop *, AliasSetTracker *> LoopToAliasMap; + DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); @@ -137,42 +131,7 @@ /// bool inSubLoop(BasicBlock *BB) { assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); - for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I) - if ((*I)->contains(BB)) - return true; // A subloop actually contains this block! - return false; - } - - /// isExitBlockDominatedByBlockInLoop - This method checks to see if the - /// specified exit block of the loop is dominated by the specified block - /// that is in the body of the loop. We use these constraints to - /// dramatically limit the amount of the dominator tree that needs to be - /// searched. - bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock, - BasicBlock *BlockInLoop) const { - // If the block in the loop is the loop header, it must be dominated! - BasicBlock *LoopHeader = CurLoop->getHeader(); - if (BlockInLoop == LoopHeader) - return true; - - DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop); - DomTreeNode *IDom = DT->getNode(ExitBlock); - - // Because the exit block is not in the loop, we know we have to get _at - // least_ its immediate dominator. - IDom = IDom->getIDom(); - - while (IDom && IDom != BlockInLoopNode) { - // If we have got to the header of the loop, then the instructions block - // did not dominate the exit node, so we can't hoist it. - if (IDom->getBlock() == LoopHeader) - return false; - - // Get next Immediate Dominator. - IDom = IDom->getIDom(); - }; - - return true; + return LI->getLoopFor(BB) != CurLoop; } /// sink - When an instruction is found to only be used outside of the loop, @@ -192,42 +151,39 @@ /// bool isSafeToExecuteUnconditionally(Instruction &I); + /// isGuaranteedToExecute - Check that the instruction is guaranteed to + /// execute. + /// + bool isGuaranteedToExecute(Instruction &I); + /// pointerInvalidatedByLoop - Return true if the body of this loop may /// store into the memory location pointed to by V. /// - bool pointerInvalidatedByLoop(Value *V, unsigned Size) { + bool pointerInvalidatedByLoop(Value *V, uint64_t Size, + const MDNode *TBAAInfo) { // Check to see if any of the basic blocks in CurLoop invalidate *V. - return CurAST->getAliasSetForPointer(V, Size).isMod(); + return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod(); } bool canSinkOrHoistInst(Instruction &I); - bool isLoopInvariantInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - /// PromoteValuesInLoop - Look at the stores in the loop and promote as many - /// to scalars as we can. - /// - void PromoteValuesInLoop(); - - /// FindPromotableValuesInLoop - Check the current loop for stores to - /// definite pointers, which are not loaded and stored through may aliases. - /// If these are found, create an alloca for the value, add it to the - /// PromotedValues list, and keep track of the mapping from value to - /// alloca... - /// - void FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &Val2AlMap); + void PromoteAliasSet(AliasSet &AS); }; } char LICM::ID = 0; -INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false); +INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LICM(); } /// Hoist expressions out of the specified loop. Note, alias info for inner -/// loop is not preserved so it is not a good idea to run LICM multiple +/// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. /// bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -236,21 +192,25 @@ // Get our Loop and Alias Analysis information... LI = &getAnalysis<LoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); - DF = &getAnalysis<DominanceFrontier>(); DT = &getAnalysis<DominatorTree>(); CurAST = new AliasSetTracker(*AA); - // Collect Alias info from subloops + // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); LoopItr != LoopItrE; ++LoopItr) { Loop *InnerL = *LoopItr; - AliasSetTracker *InnerAST = LoopToAliasMap[InnerL]; - assert (InnerAST && "Where is my AST?"); + AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; + assert(InnerAST && "Where is my AST?"); // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); + + // Once we've incorporated the inner loop's AST into ours, we don't need the + // subloop's anymore. + delete InnerAST; + LoopToAliasSetMap.erase(InnerL); } - + CurLoop = L; // Get the preheader block to move instructions into... @@ -263,7 +223,7 @@ for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops... + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block } @@ -283,15 +243,24 @@ HoistRegion(DT->getNode(L->getHeader())); // Now that all loop invariants have been removed from the loop, promote any - // memory references to scalars that we can... - if (!DisablePromotion && Preheader && L->hasDedicatedExits()) - PromoteValuesInLoop(); + // memory references to scalars that we can. + if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + // Loop over all of the alias sets in the tracker object. + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); + I != E; ++I) + PromoteAliasSet(*I); + } // Clear out loops state information for the next iteration CurLoop = 0; Preheader = 0; - LoopToAliasMap[L] = CurAST; + // If this loop is nested inside of another one, save the alias information + // for when we process the outer loop. + if (L->getParentLoop()) + LoopToAliasSetMap[L] = CurAST; + else + delete CurAST; return Changed; } @@ -308,7 +277,7 @@ // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return; - // We are processing blocks in reverse dfo, so process children first... + // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) SinkRegion(Children[i]); @@ -320,6 +289,17 @@ for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { Instruction &I = *--II; + // If the instruction is dead, we would try to sink it because it isn't used + // in the loop, instead, just delete it. + if (isInstructionTriviallyDead(&I)) { + DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); + ++II; + CurAST->deleteValue(&I); + I.eraseFromParent(); + Changed = true; + continue; + } + // Check to see if we can sink this instruction to the exit blocks // of the loop. We can do this if the all users of the instruction are // outside of the loop. In this case, it doesn't even matter if the @@ -350,14 +330,26 @@ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { Instruction &I = *II++; + // Try constant folding this instruction. If all the operands are + // constants, it is technically hoistable, but it would be better to just + // fold it. + if (Constant *C = ConstantFoldInstruction(&I)) { + DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); + CurAST->copyValue(&I, C); + CurAST->deleteValue(&I); + I.replaceAllUsesWith(C); + I.eraseFromParent(); + continue; + } + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. // - if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && + if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) && isSafeToExecuteUnconditionally(I)) hoist(I); - } + } const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) @@ -370,25 +362,30 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - if (LI->isVolatile()) - return false; // Don't hoist volatile loads! + if (!LI->isUnordered()) + return false; // Don't hoist volatile/atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - + // Don't hoist loads which have may-aliased stores in loop. - unsigned Size = 0; + uint64_t Size = 0; if (LI->getType()->isSized()) Size = AA->getTypeStoreSize(LI->getType()); - return !pointerInvalidatedByLoop(LI->getOperand(0), Size); + return !pointerInvalidatedByLoop(LI->getOperand(0), Size, + LI->getMetadata(LLVMContext::MD_tbaa)); } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { - // Handle obvious cases efficiently. + // Don't sink or hoist dbg info; it's legal, but not useful. + if (isa<DbgInfoIntrinsic>(I)) + return false; + + // Handle simple cases by querying alias analysis. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) return true; - else if (Behavior == AliasAnalysis::OnlyReadsMemory) { + if (AliasAnalysis::onlyReadsMemory(Behavior)) { // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; @@ -437,20 +434,6 @@ } -/// isLoopInvariantInst - Return true if all operands of this instruction are -/// loop invariant. We also filter out non-hoistable instructions here just for -/// efficiency. -/// -bool LICM::isLoopInvariantInst(Instruction &I) { - // The instruction is loop invariant if all of its operands are loop-invariant - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) - if (!CurLoop->isLoopInvariant(I.getOperand(i))) - return false; - - // If we got this far, the instruction is loop invariant! - return true; -} - /// sink - When an instruction is found to only be used outside of the loop, /// this function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its @@ -460,7 +443,7 @@ DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); + CurLoop->getUniqueExitBlocks(ExitBlocks); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -471,128 +454,107 @@ // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { + if (!DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. - I.removeFromParent(); - BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); - ExitBlocks[0]->getInstList().insert(InsertPt, &I); + I.moveBefore(ExitBlocks[0]->getFirstInsertionPt()); + + // This instruction is no longer in the AST for the current loop, because + // we just sunk it out of the loop. If we just sunk it into an outer + // loop, we will rediscover the operation when we process it. + CurAST->deleteValue(&I); } - } else if (ExitBlocks.empty()) { + return; + } + + if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); - } else { - // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to - // do all of the hard work of inserting PHI nodes as necessary. We convert - // the value into a stack object to get it to do this. - - // Firstly, we create a stack object to hold the value... - AllocaInst *AI = 0; - - if (!I.getType()->isVoidTy()) { - AI = new AllocaInst(I.getType(), 0, - I.getParent()->getParent()->getEntryBlock().begin()); - CurAST->add(AI); - } - - // Secondly, insert load instructions for each use of the instruction - // outside of the loop. - while (!I.use_empty()) { - Instruction *U = cast<Instruction>(I.use_back()); - - // If the user is a PHI Node, we actually have to insert load instructions - // in all predecessor blocks, not in the PHI block itself! - if (PHINode *UPN = dyn_cast<PHINode>(U)) { - // Only insert into each predecessor once, so that we don't have - // different incoming values from the same block! - std::map<BasicBlock*, Value*> InsertedBlocks; - for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i) - if (UPN->getIncomingValue(i) == &I) { - BasicBlock *Pred = UPN->getIncomingBlock(i); - Value *&PredVal = InsertedBlocks[Pred]; - if (!PredVal) { - // Insert a new load instruction right before the terminator in - // the predecessor block. - PredVal = new LoadInst(AI, Pred->getTerminator()); - CurAST->add(cast<LoadInst>(PredVal)); - } - - UPN->setIncomingValue(i, PredVal); - } - - } else { - LoadInst *L = new LoadInst(AI, U); - U->replaceUsesOfWith(&I, L); - CurAST->add(L); - } - } - - // Thirdly, insert a copy of the instruction in each exit block of the loop - // that is dominated by the instruction, storing the result into the memory - // location. Be careful not to insert the instruction into any particular - // basic block more than once. - std::set<BasicBlock*> InsertedBlocks; - BasicBlock *InstOrigBB = I.getParent(); - - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *ExitBlock = ExitBlocks[i]; - - if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) { - // If we haven't already processed this exit block, do so now. - if (InsertedBlocks.insert(ExitBlock).second) { - // Insert the code after the last PHI node... - BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); - - // If this is the first exit block processed, just move the original - // instruction, otherwise clone the original instruction and insert - // the copy. - Instruction *New; - if (InsertedBlocks.size() == 1) { - I.removeFromParent(); - ExitBlock->getInstList().insert(InsertPt, &I); - New = &I; - } else { - New = I.clone(); - CurAST->copyValue(&I, New); - if (!I.getName().empty()) - New->setName(I.getName()+".le"); - ExitBlock->getInstList().insert(InsertPt, New); - } - - // Now that we have inserted the instruction, store it into the alloca - if (AI) new StoreInst(New, AI, InsertPt); - } - } - } - - // If the instruction doesn't dominate any exit blocks, it must be dead. - if (InsertedBlocks.empty()) { - CurAST->deleteValue(&I); - I.eraseFromParent(); - } - - // Finally, promote the fine value to SSA form. - if (AI) { - std::vector<AllocaInst*> Allocas; - Allocas.push_back(AI); - PromoteMemToReg(Allocas, *DT, *DF, CurAST); - } + return; } + + // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the + // hard work of inserting PHI nodes as necessary. + SmallVector<PHINode*, 8> NewPHIs; + SSAUpdater SSA(&NewPHIs); + + if (!I.use_empty()) + SSA.Initialize(I.getType(), I.getName()); + + // Insert a copy of the instruction in each exit block of the loop that is + // dominated by the instruction. Each exit block is known to only be in the + // ExitBlocks list once. + BasicBlock *InstOrigBB = I.getParent(); + unsigned NumInserted = 0; + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + + if (!DT->dominates(InstOrigBB, ExitBlock)) + continue; + + // Insert the code after the last PHI node. + BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); + + // If this is the first exit block processed, just move the original + // instruction, otherwise clone the original instruction and insert + // the copy. + Instruction *New; + if (NumInserted++ == 0) { + I.moveBefore(InsertPt); + New = &I; + } else { + New = I.clone(); + if (!I.getName().empty()) + New->setName(I.getName()+".le"); + ExitBlock->getInstList().insert(InsertPt, New); + } + + // Now that we have inserted the instruction, inform SSAUpdater. + if (!I.use_empty()) + SSA.AddAvailableValue(ExitBlock, New); + } + + // If the instruction doesn't dominate any exit blocks, it must be dead. + if (NumInserted == 0) { + CurAST->deleteValue(&I); + if (!I.use_empty()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.eraseFromParent(); + return; + } + + // Next, rewrite uses of the instruction, inserting PHI nodes as needed. + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { + // Grab the use before incrementing the iterator. + Use &U = UI.getUse(); + // Increment the iterator before removing the use from the list. + ++UI; + SSA.RewriteUseAfterInsertions(U); + } + + // Update CurAST for NewPHIs if I had pointer type. + if (I.getType()->isPointerTy()) + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + CurAST->copyValue(&I, NewPHIs[i]); + + // Finally, remove the instruction from CurAST. It is no longer in the loop. + CurAST->deleteValue(&I); } /// hoist - When an instruction is found to only use loop invariant operands @@ -602,12 +564,8 @@ DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); - // Remove the instruction from its current basic block... but don't delete the - // instruction. - I.removeFromParent(); - - // Insert the new node in Preheader, before the terminator. - Preheader->getInstList().insert(Preheader->getTerminator(), &I); + // Move the new node to the Preheader, before its terminator. + I.moveBefore(Preheader->getTerminator()); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -624,6 +582,10 @@ if (Inst.isSafeToSpeculativelyExecute()) return true; + return isGuaranteedToExecute(Inst); +} + +bool LICM::isGuaranteedToExecute(Instruction &Inst) { // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. @@ -638,232 +600,213 @@ SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); - // For each exit block, get the DT node and walk up the DT until the - // instruction's basic block is found or we exit the loop. + // Verify that the block dominates each of the exit blocks of the loop. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent())) + if (!DT->dominates(Inst.getParent(), ExitBlocks[i])) return false; return true; } +namespace { + class LoopPromoter : public LoadAndStorePromoter { + Value *SomePtr; // Designated pointer to store to. + SmallPtrSet<Value*, 4> &PointerMustAliases; + SmallVectorImpl<BasicBlock*> &LoopExitBlocks; + AliasSetTracker &AST; + DebugLoc DL; + int Alignment; + public: + LoopPromoter(Value *SP, + const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, + SmallPtrSet<Value*, 4> &PMA, + SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, + DebugLoc dl, int alignment) + : LoadAndStorePromoter(Insts, S), SomePtr(SP), + PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), + Alignment(alignment) {} -/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking + virtual bool isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &) const { + Value *Ptr; + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + Ptr = LI->getOperand(0); + else + Ptr = cast<StoreInst>(I)->getPointerOperand(); + return PointerMustAliases.count(Ptr); + } + + virtual void doExtraRewritesBeforeFinalDeletion() const { + // Insert stores after in the loop exit blocks. Each exit block gets a + // store of the live-out values that feed them. Since we've already told + // the SSA updater about the defs in the loop and the preheader + // definition, it is all set and we can start using it. + for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = LoopExitBlocks[i]; + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); + StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + NewSI->setAlignment(Alignment); + NewSI->setDebugLoc(DL); + } + } + + virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const { + // Update alias analysis. + AST.copyValue(LI, V); + } + virtual void instructionDeleted(Instruction *I) const { + AST.deleteValue(I); + } + }; +} // end anon namespace + +/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers -/// which are loop invariant. We promote these memory locations to use allocas -/// instead. These allocas can easily be raised to register values by the -/// PromoteMem2Reg functionality. +/// which are loop invariant. /// -void LICM::PromoteValuesInLoop() { - // PromotedValues - List of values that are promoted out of the loop. Each - // value has an alloca instruction for it, and a canonical version of the - // pointer. - std::vector<std::pair<AllocaInst*, Value*> > PromotedValues; - std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca +void LICM::PromoteAliasSet(AliasSet &AS) { + // We can promote this alias set if it has a store, if it is a "Must" alias + // set, if the pointer is loop invariant, and if we are not eliminating any + // volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) + return; - FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); - if (ValueToAllocaMap.empty()) return; // If there are values to promote. + assert(!AS.empty() && + "Must alias set should have at least one pointer element in it!"); + Value *SomePtr = AS.begin()->getValue(); - Changed = true; - NumPromoted += PromotedValues.size(); - - std::vector<Value*> PointerValueNumbers; - - // Emit a copy from the value into the alloca'd value in the loop preheader - TerminatorInst *LoopPredInst = Preheader->getTerminator(); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - Value *Ptr = PromotedValues[i].second; - - // If we are promoting a pointer value, update alias information for the - // inserted load. - Value *LoadValue = 0; - if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) { - // Locate a load or store through the pointer, and assign the same value - // to LI as we are loading or storing. Since we know that the value is - // stored in this loop, this will always succeed. - for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - LoadValue = LI; - break; - } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(1) == Ptr) { - LoadValue = SI->getOperand(0); - break; - } - } - } - assert(LoadValue && "No store through the pointer found!"); - PointerValueNumbers.push_back(LoadValue); // Remember this for later. - } - - // Load from the memory we are promoting. - LoadInst *LI = new LoadInst(Ptr, LoopPredInst); - - if (LoadValue) CurAST->copyValue(LoadValue, LI); - - // Store into the temporary alloca. - new StoreInst(LI, PromotedValues[i].first, LoopPredInst); - } - - // Scan the basic blocks in the loop, replacing uses of our pointers with - // uses of the allocas in question. + // It isn't safe to promote a load/store from the loop if the load/store is + // conditional. For example, turning: // - for (Loop::block_iterator I = CurLoop->block_begin(), - E = CurLoop->block_end(); I != E; ++I) { - BasicBlock *BB = *I; - // Rewrite all loads and stores in the block of the pointer... - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (LoadInst *L = dyn_cast<LoadInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(L->getOperand(0)); - if (I != ValueToAllocaMap.end()) - L->setOperand(0, I->second); // Rewrite load instruction... - } else if (StoreInst *S = dyn_cast<StoreInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(S->getOperand(1)); - if (I != ValueToAllocaMap.end()) - S->setOperand(1, I->second); // Rewrite store instruction... - } - } - } - - // Now that the body of the loop uses the allocas instead of the original - // memory locations, insert code to copy the alloca value back into the - // original memory location on all exits from the loop. Note that we only - // want to insert one copy of the code in each exit block, though the loop may - // exit to the same block more than once. + // for () { if (c) *P += 1; } // - SmallPtrSet<BasicBlock*, 16> ProcessedBlocks; - - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - if (!ProcessedBlocks.insert(ExitBlocks[i])) - continue; - - // Copy all of the allocas into their memory locations. - BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI(); - Instruction *InsertPos = BI; - unsigned PVN = 0; - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - // Load from the alloca. - LoadInst *LI = new LoadInst(PromotedValues[i].first, InsertPos); - - // If this is a pointer type, update alias info appropriately. - if (LI->getType()->isPointerTy()) - CurAST->copyValue(PointerValueNumbers[PVN++], LI); - - // Store into the memory we promoted. - new StoreInst(LI, PromotedValues[i].second, InsertPos); - } - } - - // Now that we have done the deed, use the mem2reg functionality to promote - // all of the new allocas we just created into real SSA registers. + // into: // - std::vector<AllocaInst*> PromotedAllocas; - PromotedAllocas.reserve(PromotedValues.size()); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) - PromotedAllocas.push_back(PromotedValues[i].first); - PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); -} + // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; + // + // is not safe, because *P may only be valid to access if 'c' is true. + // + // It is safe to promote P if all uses are direct load/stores and if at + // least one is guaranteed to be executed. + bool GuaranteedToExecute = false; -/// FindPromotableValuesInLoop - Check the current loop for stores to definite -/// pointers, which are not loaded and stored through may aliases and are safe -/// for promotion. If these are found, create an alloca for the value, add it -/// to the PromotedValues list, and keep track of the mapping from value to -/// alloca. -void LICM::FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &ValueToAllocaMap) { - Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin(); + SmallVector<Instruction*, 64> LoopUses; + SmallPtrSet<Value*, 4> PointerMustAliases; - // Loop over all of the alias sets in the tracker object. - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) { - AliasSet &AS = *I; - // We can promote this alias set if it has a store, if it is a "Must" alias - // set, if the pointer is loop invariant, and if we are not eliminating any - // volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) - continue; - - assert(!AS.empty() && - "Must alias set should have at least one pointer element in it!"); - Value *V = AS.begin()->getValue(); + // We start with an alignment of one and try to find instructions that allow + // us to prove better alignment. + unsigned Alignment = 1; + + // Check that all of the pointers in the alias set have the same type. We + // cannot (yet) promote a memory location that is loaded and stored in + // different sizes. + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + Value *ASIV = ASI->getValue(); + PointerMustAliases.insert(ASIV); // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. - { - bool PointerOk = true; - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - if (V->getType() != I->getValue()->getType()) { - PointerOk = false; - break; - } - if (!PointerOk) - continue; - } + if (SomePtr->getType() != ASIV->getType()) + return; - // It isn't safe to promote a load/store from the loop if the load/store is - // conditional. For example, turning: - // - // for () { if (c) *P += 1; } - // - // into: - // - // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; - // - // is not safe, because *P may only be valid to access if 'c' is true. - // - // It is safe to promote P if all uses are direct load/stores and if at - // least one is guaranteed to be executed. - bool GuaranteedToExecute = false; - bool InvalidInst = false; - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { - // Ignore instructions not in this loop. + // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; - if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) { - InvalidInst = true; - break; - } - - if (!GuaranteedToExecute) - GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); + // If there is an non-load/store instruction in the loop, we can't promote + // it. + if (LoadInst *load = dyn_cast<LoadInst>(Use)) { + assert(!load->isVolatile() && "AST broken"); + if (!load->isSimple()) + return; + } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { + // Stores *of* the pointer are not interesting, only stores *to* the + // pointer. + if (Use->getOperand(1) != ASIV) + continue; + assert(!store->isVolatile() && "AST broken"); + if (!store->isSimple()) + return; + + // Note that we only check GuaranteedToExecute inside the store case + // so that we do not introduce stores where they did not exist before + // (which would break the LLVM concurrency model). + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + unsigned InstAlignment = store->getAlignment(); + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isGuaranteedToExecute(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + + if (!GuaranteedToExecute) + GuaranteedToExecute = isGuaranteedToExecute(*Use); + + } else + return; // Not a load or store. + + LoopUses.push_back(Use); } - - // If there is an non-load/store instruction in the loop, we can't promote - // it. If there isn't a guaranteed-to-execute instruction, we can't - // promote. - if (InvalidInst || !GuaranteedToExecute) - continue; - - const Type *Ty = cast<PointerType>(V->getType())->getElementType(); - AllocaInst *AI = new AllocaInst(Ty, 0, FnStart); - PromotedValues.push_back(std::make_pair(AI, V)); - - // Update the AST and alias analysis. - CurAST->copyValue(V, AI); - - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); - - DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n"); } + + // If there isn't a guaranteed-to-execute instruction, we can't promote. + if (!GuaranteedToExecute) + return; + + // Otherwise, this is safe to promote, lets do it! + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + Changed = true; + ++NumPromoted; + + // Grab a debug location for the inserted loads/stores; given that the + // inserted loads/stores have little relation to the original loads/stores, + // this code just arbitrarily picks a location from one, since any debug + // location is better than none. + DebugLoc DL = LoopUses[0]->getDebugLoc(); + + SmallVector<BasicBlock*, 8> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + + // We use the SSAUpdater interface to insert phi nodes as required. + SmallVector<PHINode*, 16> NewPHIs; + SSAUpdater SSA(&NewPHIs); + LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, + *CurAST, DL, Alignment); + + // Set up the preheader to have a definition of the value. It is the live-out + // value from the preheader that uses in the loop will use. + LoadInst *PreheaderLoad = + new LoadInst(SomePtr, SomePtr->getName()+".promoted", + Preheader->getTerminator()); + PreheaderLoad->setAlignment(Alignment); + PreheaderLoad->setDebugLoc(DL); + SSA.AddAvailableValue(Preheader, PreheaderLoad); + + // Rewrite all the loads in the loop and remember all the definitions from + // stores in the loop. + Promoter.run(LoopUses); + + // If the SSAUpdater didn't use the load in the preheader, just zap it now. + if (PreheaderLoad->use_empty()) + PreheaderLoad->eraseFromParent(); } + /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return; @@ -873,7 +816,7 @@ /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias /// set. void LICM::deleteAnalysisValue(Value *V, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return;
diff --git a/src/LLVM/lib/Transforms/Scalar/LLVMScalarOpts.vcxproj b/src/LLVM/lib/Transforms/Scalar/LLVMScalarOpts.vcxproj new file mode 100644 index 0000000..c092c10 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LLVMScalarOpts.vcxproj
@@ -0,0 +1,375 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|Win32"> + <Configuration>Profile</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|x64"> + <Configuration>Profile</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{A3C67D8F-E19A-46EF-91AB-C7840FE2B97C}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>LLVMScalarOpts</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMScalarOpts.pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMScalarOpts.pdb</ProgramDataBaseFileName> + <OmitFramePointers>false</OmitFramePointers> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMScalarOpts.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMScalarOpts.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMScalarOpts.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMScalarOpts.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="ADCE.cpp" /> + <ClCompile Include="CodeGenPrepare.cpp" /> + <ClCompile Include="DeadStoreElimination.cpp" /> + <ClCompile Include="GVN.cpp" /> + <ClCompile Include="LICM.cpp" /> + <ClCompile Include="LoopStrengthReduce.cpp" /> + <ClCompile Include="Reassociate.cpp" /> + <ClCompile Include="Reg2Mem.cpp" /> + <ClCompile Include="SCCP.cpp" /> + <ClCompile Include="ScalarReplAggregates.cpp" /> + <ClCompile Include="SimplifyCFGPass.cpp" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj"> + <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project> + </ProjectReference> + <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj"> + <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project> + </ProjectReference> + <ProjectReference Include="..\InstCombine/LLVMInstCombine.vcxproj"> + <Project>D35C7204-D4E0-4EE5-8B6D-BA1B589F5D36</Project> + </ProjectReference> + <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj"> + <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project> + </ProjectReference> + <ProjectReference Include="..\..\Target/LLVMTarget.vcxproj"> + <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project> + </ProjectReference> + <ProjectReference Include="..\Utils/LLVMTransformUtils.vcxproj"> + <Project>5C514254-58EE-4850-8743-F5D7BEAA3E66</Project> + </ProjectReference> + <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj"> + <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Scalar/LLVMScalarOpts.vcxproj.filters b/src/LLVM/lib/Transforms/Scalar/LLVMScalarOpts.vcxproj.filters new file mode 100644 index 0000000..2822d60 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LLVMScalarOpts.vcxproj.filters
@@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <ClCompile Include="ADCE.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="CodeGenPrepare.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="DeadStoreElimination.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="GVN.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="LICM.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="LoopStrengthReduce.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="Reassociate.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="Reg2Mem.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="SCCP.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="ScalarReplAggregates.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="SimplifyCFGPass.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Scalar/LoopDeletion.cpp b/src/LLVM/lib/Transforms/Scalar/LoopDeletion.cpp new file mode 100644 index 0000000..f7f3298 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -0,0 +1,248 @@ +//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Dead Loop Deletion Pass. This pass is responsible +// for eliminating loops with non-infinite computable trip counts that have no +// side effects or volatile instructions, and do not contribute to the +// computation of the function's return value. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-delete" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +STATISTIC(NumDeleted, "Number of loops deleted"); + +namespace { + class LoopDeletion : public LoopPass { + public: + static char ID; // Pass ID, replacement for typeid + LoopDeletion() : LoopPass(ID) { + initializeLoopDeletionPass(*PassRegistry::getPassRegistry()); + } + + // Possibly eliminate loop L if it is dead. + bool runOnLoop(Loop* L, LPPassManager& LPM); + + bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, + SmallVector<BasicBlock*, 4>& exitBlocks, + bool &Changed, BasicBlock *Preheader); + + virtual void getAnalysisUsage(AnalysisUsage& AU) const { + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DominatorTree>(); + AU.addPreserved<LoopInfo>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + } + }; +} + +char LoopDeletion::ID = 0; +INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion", + "Delete dead loops", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopDeletion, "loop-deletion", + "Delete dead loops", false, false) + +Pass* llvm::createLoopDeletionPass() { + return new LoopDeletion(); +} + +/// IsLoopDead - Determined if a loop is dead. This assumes that we've already +/// checked for unique exit and exiting blocks, and that the code is in LCSSA +/// form. +bool LoopDeletion::IsLoopDead(Loop* L, + SmallVector<BasicBlock*, 4>& exitingBlocks, + SmallVector<BasicBlock*, 4>& exitBlocks, + bool &Changed, BasicBlock *Preheader) { + BasicBlock* exitBlock = exitBlocks[0]; + + // Make sure that all PHI entries coming from the loop are loop invariant. + // Because the code is in LCSSA form, any values used outside of the loop + // must pass through a PHI in the exit block, meaning that this check is + // sufficient to guarantee that no loop-variant values are used outside + // of the loop. + BasicBlock::iterator BI = exitBlock->begin(); + while (PHINode* P = dyn_cast<PHINode>(BI)) { + Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]); + + // Make sure all exiting blocks produce the same incoming value for the exit + // block. If there are different incoming values for different exiting + // blocks, then it is impossible to statically determine which value should + // be used. + for (unsigned i = 1; i < exitingBlocks.size(); ++i) { + if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) + return false; + } + + if (Instruction* I = dyn_cast<Instruction>(incoming)) + if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) + return false; + + ++BI; + } + + // Make sure that no instructions in the block have potential side-effects. + // This includes instructions that could write to memory, and loads that are + // marked volatile. This could be made more aggressive by using aliasing + // information to identify readonly and readnone calls. + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) { + for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); + BI != BE; ++BI) { + if (BI->mayHaveSideEffects()) + return false; + } + } + + return true; +} + +/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the +/// observable behavior of the program other than finite running time. Note +/// we do ensure that this never remove a loop that might be infinite, as doing +/// so could change the halting/non-halting nature of a program. +/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA +/// in order to make various safety checks work. +bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { + // We can only remove the loop if there is a preheader that we can + // branch from after removing it. + BasicBlock* preheader = L->getLoopPreheader(); + if (!preheader) + return false; + + // If LoopSimplify form is not available, stay out of trouble. + if (!L->hasDedicatedExits()) + return false; + + // We can't remove loops that contain subloops. If the subloops were dead, + // they would already have been removed in earlier executions of this pass. + if (L->begin() != L->end()) + return false; + + SmallVector<BasicBlock*, 4> exitingBlocks; + L->getExitingBlocks(exitingBlocks); + + SmallVector<BasicBlock*, 4> exitBlocks; + L->getUniqueExitBlocks(exitBlocks); + + // We require that the loop only have a single exit block. Otherwise, we'd + // be in the situation of needing to be able to solve statically which exit + // block will be branched to, or trying to preserve the branching logic in + // a loop invariant manner. + if (exitBlocks.size() != 1) + return false; + + // Finally, we have to check that the loop really is dead. + bool Changed = false; + if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) + return Changed; + + // Don't remove loops for which we can't solve the trip count. + // They could be infinite, in which case we'd be changing program behavior. + ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); + const SCEV *S = SE.getMaxBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(S)) + return Changed; + + // Now that we know the removal is safe, remove the loop by changing the + // branch from the preheader to go to the single exit block. + BasicBlock* exitBlock = exitBlocks[0]; + + // Because we're deleting a large chunk of code at once, the sequence in which + // we remove things is very important to avoid invalidation issues. Don't + // mess with this unless you have good reason and know what you're doing. + + // Tell ScalarEvolution that the loop is deleted. Do this before + // deleting the loop so that ScalarEvolution can look at the loop + // to determine what it needs to clean up. + SE.forgetLoop(L); + + // Connect the preheader directly to the exit block. + TerminatorInst* TI = preheader->getTerminator(); + TI->replaceUsesOfWith(L->getHeader(), exitBlock); + + // Rewrite phis in the exit block to get their inputs from + // the preheader instead of the exiting block. + BasicBlock* exitingBlock = exitingBlocks[0]; + BasicBlock::iterator BI = exitBlock->begin(); + while (PHINode* P = dyn_cast<PHINode>(BI)) { + int j = P->getBasicBlockIndex(exitingBlock); + assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); + P->setIncomingBlock(j, preheader); + for (unsigned i = 1; i < exitingBlocks.size(); ++i) + P->removeIncomingValue(exitingBlocks[i]); + ++BI; + } + + // Update the dominator tree and remove the instructions and blocks that will + // be deleted from the reference counting scheme. + DominatorTree& DT = getAnalysis<DominatorTree>(); + SmallVector<DomTreeNode*, 8> ChildNodes; + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) { + // Move all of the block's children to be children of the preheader, which + // allows us to remove the domtree entry for the block. + ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end()); + for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(), + DE = ChildNodes.end(); DI != DE; ++DI) { + DT.changeImmediateDominator(*DI, DT[preheader]); + } + + ChildNodes.clear(); + DT.eraseNode(*LI); + + // Remove the block from the reference counting scheme, so that we can + // delete it freely later. + (*LI)->dropAllReferences(); + } + + // Erase the instructions and the blocks without having to worry + // about ordering because we already dropped the references. + // NOTE: This iteration is safe because erasing the block does not remove its + // entry from the loop's block list. We do that in the next section. + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) + (*LI)->eraseFromParent(); + + // Finally, the blocks from loopinfo. This has to happen late because + // otherwise our loop iterators won't work. + LoopInfo& loopInfo = getAnalysis<LoopInfo>(); + SmallPtrSet<BasicBlock*, 8> blocks; + blocks.insert(L->block_begin(), L->block_end()); + for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), + E = blocks.end(); I != E; ++I) + loopInfo.removeBlock(*I); + + // The last step is to inform the loop pass manager that we've + // eliminated this loop. + LPM.deleteLoopFromQueue(L); + Changed = true; + + ++NumDeleted; + + return Changed; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/src/LLVM/lib/Transforms/Scalar/LoopIdiomRecognize.cpp new file mode 100644 index 0000000..ad15cbb --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,634 @@ +//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements an idiom recognizer that transforms simple loops into a +// non-loop form. In cases that this kicks in, it can be a significant +// performance win. +// +//===----------------------------------------------------------------------===// +// +// TODO List: +// +// Future loop memory idioms to recognize: +// memcmp, memmove, strlen, etc. +// Future floating point idioms to recognize in -ffast-math mode: +// fpowi +// Future integer operation idioms to recognize: +// ctpop, ctlz, cttz +// +// Beware that isel's default lowering for ctpop is highly inefficient for +// i64 and larger types when i64 is legal and the value has few bits set. It +// would be good to enhance isel to emit a loop for ctpop in this case. +// +// We should enhance the memset/memcpy recognition to handle multiple stores in +// the loop. This would handle things like: +// void foo(_Complex float *P) +// for (i) { __real__(*P) = 0; __imag__(*P) = 0; } +// +// We should enhance this to handle negative strides through memory. +// Alternatively (and perhaps better) we could rely on an earlier pass to force +// forward iteration through memory, which is generally better for cache +// behavior. Negative strides *do* happen for memset/memcpy loops. +// +// This could recognize common matrix multiplies and dot product idioms and +// replace them with calls to BLAS (if linked in??). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-idiom" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); +STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); + +namespace { + class LoopIdiomRecognize : public LoopPass { + Loop *CurLoop; + const TargetData *TD; + DominatorTree *DT; + ScalarEvolution *SE; + TargetLibraryInfo *TLI; + public: + static char ID; + explicit LoopIdiomRecognize() : LoopPass(ID) { + initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM); + bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, + SmallVectorImpl<BasicBlock*> &ExitBlocks); + + bool processLoopStore(StoreInst *SI, const SCEV *BECount); + bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); + + bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, + unsigned StoreAlignment, + Value *SplatValue, Instruction *TheStore, + const SCEVAddRecExpr *Ev, + const SCEV *BECount); + bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, + const SCEVAddRecExpr *StoreEv, + const SCEVAddRecExpr *LoadEv, + const SCEV *BECount); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<ScalarEvolution>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DominatorTree>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); + } + }; +} + +char LoopIdiomRecognize::ID = 0; +INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", + false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", + false, false) + +Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } + +/// deleteDeadInstruction - Delete this instruction. Before we do, go through +/// and zero out all the operands of this instruction. If any of them become +/// dead, delete them and the computation tree that feeds them. +/// +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { + SmallVector<Instruction*, 32> NowDeadInsts; + + NowDeadInsts.push_back(I); + + // Before we touch this instruction, remove it from SE! + do { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + + // This instruction is dead, zap it, in stages. Start by removing it from + // SCEV. + SE.forgetValue(DeadInst); + + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { + Value *Op = DeadInst->getOperand(op); + DeadInst->setOperand(op, 0); + + // If this operand just became dead, add it to the NowDeadInsts list. + if (!Op->use_empty()) continue; + + if (Instruction *OpI = dyn_cast<Instruction>(Op)) + if (isInstructionTriviallyDead(OpI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + + } while (!NowDeadInsts.empty()); +} + +/// deleteIfDeadInstruction - If the specified value is a dead instruction, +/// delete it and any recursively used instructions. +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (isInstructionTriviallyDead(I)) + deleteDeadInstruction(I, SE); +} + +bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { + CurLoop = L; + + // Disable loop idiom recognition if the function's name is a common idiom. + StringRef Name = L->getHeader()->getParent()->getName(); + if (Name == "memset" || Name == "memcpy") + return false; + + // The trip count of the loop must be analyzable. + SE = &getAnalysis<ScalarEvolution>(); + if (!SE->hasLoopInvariantBackedgeTakenCount(L)) + return false; + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECount)) return false; + + // If this loop executes exactly one time, then it should be peeled, not + // optimized by this pass. + if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) + if (BECst->getValue()->getValue() == 0) + return false; + + // We require target data for now. + TD = getAnalysisIfAvailable<TargetData>(); + if (TD == 0) return false; + + DT = &getAnalysis<DominatorTree>(); + LoopInfo &LI = getAnalysis<LoopInfo>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + + SmallVector<BasicBlock*, 8> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + + DEBUG(dbgs() << "loop-idiom Scanning: F[" + << L->getHeader()->getParent()->getName() + << "] Loop %" << L->getHeader()->getName() << "\n"); + + bool MadeChange = false; + // Scan all the blocks in the loop that are not in subloops. + for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; + ++BI) { + // Ignore blocks in subloops. + if (LI.getLoopFor(*BI) != CurLoop) + continue; + + MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); + } + return MadeChange; +} + +/// runOnLoopBlock - Process the specified block, which lives in a counted loop +/// with the specified backedge count. This block is known to be in the current +/// loop and not in any subloops. +bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, + SmallVectorImpl<BasicBlock*> &ExitBlocks) { + // We can only promote stores in this block if they are unconditionally + // executed in the loop. For a block to be unconditionally executed, it has + // to dominate all the exit blocks of the loop. Verify this now. + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!DT->dominates(BB, ExitBlocks[i])) + return false; + + bool MadeChange = false; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + // Look for store instructions, which may be optimized to memset/memcpy. + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + WeakVH InstPtr(I); + if (!processLoopStore(SI, BECount)) continue; + MadeChange = true; + + // If processing the store invalidated our iterator, start over from the + // top of the block. + if (InstPtr == 0) + I = BB->begin(); + continue; + } + + // Look for memset instructions, which may be optimized to a larger memset. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { + WeakVH InstPtr(I); + if (!processLoopMemSet(MSI, BECount)) continue; + MadeChange = true; + + // If processing the memset invalidated our iterator, start over from the + // top of the block. + if (InstPtr == 0) + I = BB->begin(); + continue; + } + } + + return MadeChange; +} + + +/// processLoopStore - See if this store can be promoted to a memset or memcpy. +bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { + if (!SI->isSimple()) return false; + + Value *StoredVal = SI->getValueOperand(); + Value *StorePtr = SI->getPointerOperand(); + + // Reject stores that are so large that they overflow an unsigned. + uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); + if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided store. If we have something else, it's a + // random store we can't handle. + const SCEVAddRecExpr *StoreEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); + if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) + return false; + + // Check to see if the stride matches the size of the store. If so, then we + // know that every byte is touched in the loop. + unsigned StoreSize = (unsigned)SizeInBits >> 3; + const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); + + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { + // TODO: Could also handle negative stride here someday, that will require + // the validity check in mayLoopAccessLocation to be updated though. + // Enable this to print exact negative strides. + if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { + dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; + dbgs() << "BB: " << *SI->getParent(); + } + + return false; + } + + // See if we can optimize just this store in isolation. + if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount)) + return true; + + // If the stored value is a strided load in the same loop with the same stride + // this this may be transformable into a memcpy. This kicks in for stuff like + // for (i) A[i] = B[i]; + if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + const SCEVAddRecExpr *LoadEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); + if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && + StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) + if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) + return true; + } + //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; + + return false; +} + +/// processLoopMemSet - See if this memset can be promoted to a large memset. +bool LoopIdiomRecognize:: +processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { + // We can only handle non-volatile memsets with a constant size. + if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false; + + // If we're not allowed to hack on memset, we fail. + if (!TLI->has(LibFunc::memset)) + return false; + + Value *Pointer = MSI->getDest(); + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided store. If we have something else, it's a + // random store we can't handle. + const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); + if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) + return false; + + // Reject memsets that are so large that they overflow an unsigned. + uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); + if ((SizeInBytes >> 32) != 0) + return false; + + // Check to see if the stride matches the size of the memset. If so, then we + // know that every byte is touched in the loop. + const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); + + // TODO: Could also handle negative stride here someday, that will require the + // validity check in mayLoopAccessLocation to be updated though. + if (Stride == 0 || MSI->getLength() != Stride->getValue()) + return false; + + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, + MSI->getAlignment(), MSI->getValue(), + MSI, Ev, BECount); +} + + +/// mayLoopAccessLocation - Return true if the specified loop might access the +/// specified pointer location, which is a loop-strided access. The 'Access' +/// argument specifies what the verboten forms of access are (read or write). +static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, + Loop *L, const SCEV *BECount, + unsigned StoreSize, AliasAnalysis &AA, + Instruction *IgnoredStore) { + // Get the location that may be stored across the loop. Since the access is + // strided positively through memory, we say that the modified location starts + // at the pointer and has infinite size. + uint64_t AccessSize = AliasAnalysis::UnknownSize; + + // If the loop iterates a fixed number of times, we can refine the access size + // to be exactly the size of the memset, which is (BECount+1)*StoreSize + if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) + AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; + + // TODO: For this to be really effective, we have to dive into the pointer + // operand in the store. Store to &A[i] of 100 will always return may alias + // with store of &A[100], we need to StoreLoc to be "A" with size of 100, + // which will then no-alias a store to &A[100]. + AliasAnalysis::Location StoreLoc(Ptr, AccessSize); + + for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; + ++BI) + for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) + if (&*I != IgnoredStore && + (AA.getModRefInfo(I, StoreLoc) & Access)) + return true; + + return false; +} + +/// getMemSetPatternValue - If a strided store of the specified value is safe to +/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should +/// be passed in. Otherwise, return null. +/// +/// Note that we don't ever attempt to use memset_pattern8 or 4, because these +/// just replicate their input array and then pass on to memset_pattern16. +static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { + // If the value isn't a constant, we can't promote it to being in a constant + // array. We could theoretically do a store to an alloca or something, but + // that doesn't seem worthwhile. + Constant *C = dyn_cast<Constant>(V); + if (C == 0) return 0; + + // Only handle simple values that are a power of two bytes in size. + uint64_t Size = TD.getTypeSizeInBits(V->getType()); + if (Size == 0 || (Size & 7) || (Size & (Size-1))) + return 0; + + // Don't care enough about darwin/ppc to implement this. + if (TD.isBigEndian()) + return 0; + + // Convert to size in bytes. + Size /= 8; + + // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see + // if the top and bottom are the same (e.g. for vectors and large integers). + if (Size > 16) return 0; + + // If the constant is exactly 16 bytes, just use it. + if (Size == 16) return C; + + // Otherwise, we'll use an array of the constants. + unsigned ArraySize = 16/Size; + ArrayType *AT = ArrayType::get(V->getType(), ArraySize); + return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C)); +} + + +/// processLoopStridedStore - We see a strided store of some value. If we can +/// transform this into a memset or memset_pattern in the loop preheader, do so. +bool LoopIdiomRecognize:: +processLoopStridedStore(Value *DestPtr, unsigned StoreSize, + unsigned StoreAlignment, Value *StoredVal, + Instruction *TheStore, const SCEVAddRecExpr *Ev, + const SCEV *BECount) { + + // If the stored value is a byte-wise value (like i32 -1), then it may be + // turned into a memset of i8 -1, assuming that all the consecutive bytes + // are stored. A store of i32 0x01020304 can never be turned into a memset, + // but it can be turned into memset_pattern if the target supports it. + Value *SplatValue = isBytewiseValue(StoredVal); + Constant *PatternValue = 0; + + // If we're allowed to form a memset, and the stored value would be acceptable + // for memset, use it. + if (SplatValue && TLI->has(LibFunc::memset) && + // Verify that the stored value is loop invariant. If not, we can't + // promote the memset. + CurLoop->isLoopInvariant(SplatValue)) { + // Keep and use SplatValue. + PatternValue = 0; + } else if (TLI->has(LibFunc::memset_pattern16) && + (PatternValue = getMemSetPatternValue(StoredVal, *TD))) { + // It looks like we can use PatternValue! + SplatValue = 0; + } else { + // Otherwise, this isn't an idiom we can transform. For example, we can't + // do anything with a 3-byte store. + return false; + } + + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE, "loop-idiom"); + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn + // this into a memset in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the aliased location. Check for any overlap by generating the + // base pointer and checking the region. + unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); + Value *BasePtr = + Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), + Preheader->getTerminator()); + + + if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(BasePtr, *SE); + return false; + } + + // Okay, everything looks good, insert the memset. + + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to + // pointer size if it isn't already. + Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); + BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); + + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), + SCEV::FlagNUW); + if (StoreSize != 1) + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), + SCEV::FlagNUW); + + Value *NumBytes = + Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); + + CallInst *NewCall; + if (SplatValue) + NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); + else { + Module *M = TheStore->getParent()->getParent()->getParent(); + Value *MSP = M->getOrInsertFunction("memset_pattern16", + Builder.getVoidTy(), + Builder.getInt8PtrTy(), + Builder.getInt8PtrTy(), IntPtr, + (void*)0); + + // Otherwise we should form a memset_pattern16. PatternValue is known to be + // an constant array of 16-bytes. Plop the value into a mergable global. + GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, + GlobalValue::InternalLinkage, + PatternValue, ".memset_pattern"); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(16); + Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); + NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); + } + + DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" + << " from store to: " << *Ev << " at: " << *TheStore << "\n"); + NewCall->setDebugLoc(TheStore->getDebugLoc()); + + // Okay, the memset has been formed. Zap the original store and anything that + // feeds into it. + deleteDeadInstruction(TheStore, *SE); + ++NumMemSet; + return true; +} + +/// processLoopStoreOfLoopLoad - We see a strided store whose value is a +/// same-strided load. +bool LoopIdiomRecognize:: +processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, + const SCEVAddRecExpr *StoreEv, + const SCEVAddRecExpr *LoadEv, + const SCEV *BECount) { + // If we're not allowed to form memcpy, we fail. + if (!TLI->has(LibFunc::memcpy)) + return false; + + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE, "loop-idiom"); + + // Okay, we have a strided store "p[i]" of a loaded value. We can turn + // this into a memcpy in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write the memory region we're storing to. This includes the load that + // feeds the stores. Check for an alias by generating the base address and + // checking everything. + Value *StoreBasePtr = + Expander.expandCodeFor(StoreEv->getStart(), + Builder.getInt8PtrTy(SI->getPointerAddressSpace()), + Preheader->getTerminator()); + + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, StoreSize, + getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(StoreBasePtr, *SE); + return false; + } + + // For a memcpy, we have to make sure that the input array is not being + // mutated by the loop. + Value *LoadBasePtr = + Expander.expandCodeFor(LoadEv->getStart(), + Builder.getInt8PtrTy(LI->getPointerAddressSpace()), + Preheader->getTerminator()); + + if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(LoadBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE); + return false; + } + + // Okay, everything is safe, we can transform this! + + + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to + // pointer size if it isn't already. + Type *IntPtr = TD->getIntPtrType(SI->getContext()); + BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); + + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), + SCEV::FlagNUW); + if (StoreSize != 1) + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), + SCEV::FlagNUW); + + Value *NumBytes = + Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); + + CallInst *NewCall = + Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, + std::min(SI->getAlignment(), LI->getAlignment())); + NewCall->setDebugLoc(SI->getDebugLoc()); + + DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" + << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" + << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); + + + // Okay, the memset has been formed. Zap the original store and anything that + // feeds into it. + deleteDeadInstruction(SI, *SE); + ++NumMemCpy; + return true; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/LoopInstSimplify.cpp b/src/LLVM/lib/Transforms/Scalar/LoopInstSimplify.cpp new file mode 100644 index 0000000..af25c5c --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@ +//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs lightweight instruction simplification on loop bodies. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-instsimplify" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSimplified, "Number of redundant instructions simplified"); + +namespace { + class LoopInstSimplify : public LoopPass { + public: + static char ID; // Pass ID, replacement for typeid + LoopInstSimplify() : LoopPass(ID) { + initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop*, LPPassManager&); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + AU.addPreserved("scalar-evolution"); + } + }; +} + +char LoopInstSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", + "Simplify instructions in loops", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify", + "Simplify instructions in loops", false, false) + +Pass *llvm::createLoopInstSimplifyPass() { + return new LoopInstSimplify(); +} + +bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); + LoopInfo *LI = &getAnalysis<LoopInfo>(); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + array_pod_sort(ExitBlocks.begin(), ExitBlocks.end()); + + SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; + + // The bit we are stealing from the pointer represents whether this basic + // block is the header of a subloop, in which case we only process its phis. + typedef PointerIntPair<BasicBlock*, 1> WorklistItem; + SmallVector<WorklistItem, 16> VisitStack; + SmallPtrSet<BasicBlock*, 32> Visited; + + bool Changed = false; + bool LocalChanged; + do { + LocalChanged = false; + + VisitStack.clear(); + Visited.clear(); + + VisitStack.push_back(WorklistItem(L->getHeader(), false)); + + while (!VisitStack.empty()) { + WorklistItem Item = VisitStack.pop_back_val(); + BasicBlock *BB = Item.getPointer(); + bool IsSubloopHeader = Item.getInt(); + + // Simplify instructions in the current basic block. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + Instruction *I = BI++; + + // The first time through the loop ToSimplify is empty and we try to + // simplify all instructions. On later iterations ToSimplify is not + // empty and we only bother simplifying instructions that are in it. + if (!ToSimplify->empty() && !ToSimplify->count(I)) + continue; + + // Don't bother simplifying unused instructions. + if (!I->use_empty()) { + Value *V = SimplifyInstruction(I, TD, DT); + if (V && LI->replacementPreservesLCSSAForm(I, V)) { + // Mark all uses for resimplification next time round the loop. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Next->insert(cast<Instruction>(*UI)); + + I->replaceAllUsesWith(V); + LocalChanged = true; + ++NumSimplified; + } + } + LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); + + if (IsSubloopHeader && !isa<PHINode>(I)) + break; + } + + // Add all successors to the worklist, except for loop exit blocks and the + // bodies of subloops. We visit the headers of loops so that we can process + // their phis, but we contract the rest of the subloop body and only follow + // edges leading back to the original loop. + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; + ++SI) { + BasicBlock *SuccBB = *SI; + if (!Visited.insert(SuccBB)) + continue; + + const Loop *SuccLoop = LI->getLoopFor(SuccBB); + if (SuccLoop && SuccLoop->getHeader() == SuccBB + && L->contains(SuccLoop)) { + VisitStack.push_back(WorklistItem(SuccBB, true)); + + SmallVector<BasicBlock*, 8> SubLoopExitBlocks; + SuccLoop->getExitBlocks(SubLoopExitBlocks); + + for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) { + BasicBlock *ExitBB = SubLoopExitBlocks[i]; + if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB)) + VisitStack.push_back(WorklistItem(ExitBB, false)); + } + + continue; + } + + bool IsExitBlock = std::binary_search(ExitBlocks.begin(), + ExitBlocks.end(), SuccBB); + if (IsExitBlock) + continue; + + VisitStack.push_back(WorklistItem(SuccBB, false)); + } + } + + // Place the list of instructions to simplify on the next loop iteration + // into ToSimplify. + std::swap(ToSimplify, Next); + Next->clear(); + + Changed |= LocalChanged; + } while (LocalChanged); + + return Changed; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/LoopRotation.cpp b/src/LLVM/lib/Transforms/Scalar/LoopRotation.cpp new file mode 100644 index 0000000..9fd0958 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LoopRotation.cpp
@@ -0,0 +1,353 @@ +//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Loop Rotation Pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-rotate" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +#define MAX_HEADER_SIZE 16 + +STATISTIC(NumRotated, "Number of loops rotated"); +namespace { + + class LoopRotate : public LoopPass { + public: + static char ID; // Pass ID, replacement for typeid + LoopRotate() : LoopPass(ID) { + initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + } + + // LCSSA form makes instruction renaming easier. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addPreserved<ScalarEvolution>(); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM); + bool rotateLoop(Loop *L); + + private: + LoopInfo *LI; + }; +} + +char LoopRotate::ID = 0; +INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) + +Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } + +/// Rotate Loop L as many times as possible. Return true if +/// the loop is rotated at least once. +bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { + LI = &getAnalysis<LoopInfo>(); + + // One loop can be rotated multiple times. + bool MadeChange = false; + while (rotateLoop(L)) + MadeChange = true; + + return MadeChange; +} + +/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the +/// old header into the preheader. If there were uses of the values produced by +/// these instruction that were outside of the loop, we have to insert PHI nodes +/// to merge the two values. Do this now. +static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, + BasicBlock *OrigPreheader, + ValueToValueMapTy &ValueMap) { + // Remove PHI node entries that are no longer live. + BasicBlock::iterator I, E = OrigHeader->end(); + for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) + PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); + + // Now fix up users of the instructions in OrigHeader, inserting PHI nodes + // as necessary. + SSAUpdater SSA; + for (I = OrigHeader->begin(); I != E; ++I) { + Value *OrigHeaderVal = I; + + // If there are no uses of the value (e.g. because it returns void), there + // is nothing to rewrite. + if (OrigHeaderVal->use_empty()) + continue; + + Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal]; + + // The value now exits in two versions: the initial value in the preheader + // and the loop "next" value in the original header. + SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); + SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); + SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); + + // Visit each use of the OrigHeader instruction. + for (Value::use_iterator UI = OrigHeaderVal->use_begin(), + UE = OrigHeaderVal->use_end(); UI != UE; ) { + // Grab the use before incrementing the iterator. + Use &U = UI.getUse(); + + // Increment the iterator before removing the use from the list. + ++UI; + + // SSAUpdater can't handle a non-PHI use in the same block as an + // earlier def. We can easily handle those cases manually. + Instruction *UserInst = cast<Instruction>(U.getUser()); + if (!isa<PHINode>(UserInst)) { + BasicBlock *UserBB = UserInst->getParent(); + + // The original users in the OrigHeader are already using the + // original definitions. + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped. + if (UserBB == OrigPreheader) { + U = OrigPreHeaderVal; + continue; + } + } + + // Anything else can be handled by SSAUpdater. + SSA.RewriteUse(U); + } + } +} + +/// Rotate loop LP. Return true if the loop is rotated. +bool LoopRotate::rotateLoop(Loop *L) { + // If the loop has only one block then there is not much to rotate. + if (L->getBlocks().size() == 1) + return false; + + BasicBlock *OrigHeader = L->getHeader(); + + BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); + if (BI == 0 || BI->isUnconditional()) + return false; + + // If the loop header is not one of the loop exiting blocks then + // either this loop is already rotated or it is not + // suitable for loop rotation transformations. + if (!L->isLoopExiting(OrigHeader)) + return false; + + // Updating PHInodes in loops with multiple exits adds complexity. + // Keep it simple, and restrict loop rotation to loops with one exit only. + // In future, lift this restriction and support for multiple exits if + // required. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() > 1) + return false; + + // Check size of original header and reject loop if it is very big. + { + CodeMetrics Metrics; + Metrics.analyzeBasicBlock(OrigHeader); + if (Metrics.NumInsts > MAX_HEADER_SIZE) + return false; + } + + // Now, this loop is suitable for rotation. + BasicBlock *OrigPreheader = L->getLoopPreheader(); + BasicBlock *OrigLatch = L->getLoopLatch(); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (OrigPreheader == 0 || OrigLatch == 0) + return false; + + // Anything ScalarEvolution may know about this loop or the PHI nodes + // in its header will soon be invalidated. + if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) + SE->forgetLoop(L); + + // Find new Loop header. NewHeader is a Header's one and only successor + // that is inside loop. Header's other successor is outside the + // loop. Otherwise loop is not suitable for rotation. + BasicBlock *Exit = BI->getSuccessor(0); + BasicBlock *NewHeader = BI->getSuccessor(1); + if (L->contains(Exit)) + std::swap(Exit, NewHeader); + assert(NewHeader && "Unable to determine new loop header"); + assert(L->contains(NewHeader) && !L->contains(Exit) && + "Unable to determine loop header and exit blocks"); + + // This code assumes that the new header has exactly one predecessor. + // Remove any single-entry PHI nodes in it. + assert(NewHeader->getSinglePredecessor() && + "New header doesn't have one pred!"); + FoldSingleEntryPHINodes(NewHeader); + + // Begin by walking OrigHeader and populating ValueMap with an entry for + // each Instruction. + BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); + ValueToValueMapTy ValueMap; + + // For PHI nodes, the value available in OldPreHeader is just the + // incoming value from OldPreHeader. + for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) + ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); + + // For the rest of the instructions, either hoist to the OrigPreheader if + // possible or create a clone in the OldPreHeader if not. + TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); + while (I != E) { + Instruction *Inst = I++; + + // If the instruction's operands are invariant and it doesn't read or write + // memory, then it is safe to hoist. Doing this doesn't change the order of + // execution in the preheader, but does prevent the instruction from + // executing in each iteration of the loop. This means it is safe to hoist + // something that might trap, but isn't safe to hoist something that reads + // memory (without proving that the loop doesn't write). + if (L->hasLoopInvariantOperands(Inst) && + !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && + !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) { + Inst->moveBefore(LoopEntryBranch); + continue; + } + + // Otherwise, create a duplicate of the instruction. + Instruction *C = Inst->clone(); + + // Eagerly remap the operands of the instruction. + RemapInstruction(C, ValueMap, + RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + + // With the operands remapped, see if the instruction constant folds or is + // otherwise simplifyable. This commonly occurs because the entry from PHI + // nodes allows icmps and other instructions to fold. + Value *V = SimplifyInstruction(C); + if (V && LI->replacementPreservesLCSSAForm(C, V)) { + // If so, then delete the temporary instruction and stick the folded value + // in the map. + delete C; + ValueMap[Inst] = V; + } else { + // Otherwise, stick the new instruction into the new block! + C->setName(Inst->getName()); + C->insertBefore(LoopEntryBranch); + ValueMap[Inst] = C; + } + } + + // Along with all the other instructions, we just cloned OrigHeader's + // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's + // successors by duplicating their incoming values for OrigHeader. + TerminatorInst *TI = OrigHeader->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin(); + PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); + + // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove + // OrigPreHeader's old terminator (the original branch into the loop), and + // remove the corresponding incoming values from the PHI nodes in OrigHeader. + LoopEntryBranch->eraseFromParent(); + + // If there were any uses of instructions in the duplicated block outside the + // loop, update them, inserting PHI nodes as required + RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap); + + // NewHeader is now the header of the loop. + L->moveToHeader(NewHeader); + assert(L->getHeader() == NewHeader && "Latch block is our new header"); + + + // At this point, we've finished our major CFG changes. As part of cloning + // the loop into the preheader we've simplified instructions and the + // duplicated conditional branch may now be branching on a constant. If it is + // branching on a constant and if that constant means that we enter the loop, + // then we fold away the cond branch to an uncond branch. This simplifies the + // loop in cases important for nested loops, and it also means we don't have + // to split as many edges. + BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); + assert(PHBI->isConditional() && "Should be clone of BI condbr!"); + if (!isa<ConstantInt>(PHBI->getCondition()) || + PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) + != NewHeader) { + // The conditional branch can't be folded, handle the general case. + // Update DominatorTree to reflect the CFG change we just made. Then split + // edges as necessary to preserve LoopSimplify form. + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { + // Since OrigPreheader now has the conditional branch to Exit block, it is + // the dominator of Exit. + DT->changeImmediateDominator(Exit, OrigPreheader); + DT->changeImmediateDominator(NewHeader, OrigPreheader); + + // Update OrigHeader to be dominated by the new header block. + DT->changeImmediateDominator(OrigHeader, OrigLatch); + } + + // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and + // thus is not a preheader anymore. Split the edge to form a real preheader. + BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this); + NewPH->setName(NewHeader->getName() + ".lr.ph"); + + // Preserve canonical loop form, which means that 'Exit' should have only one + // predecessor. + BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this); + ExitSplit->moveBefore(Exit); + } else { + // We can fold the conditional branch in the preheader, this makes things + // simpler. The first step is to remove the extra edge to the Exit block. + Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); + NewBI->setDebugLoc(PHBI->getDebugLoc()); + PHBI->eraseFromParent(); + + // With our CFG finalized, update DomTree if it is available. + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { + // Update OrigHeader to be dominated by the new header block. + DT->changeImmediateDominator(NewHeader, OrigPreheader); + DT->changeImmediateDominator(OrigHeader, OrigLatch); + } + } + + assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); + assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); + + // Now that the CFG and DomTree are in a consistent state again, try to merge + // the OrigHeader block into OrigLatch. This will succeed if they are + // connected by an unconditional branch. This is just a cleanup so the + // emitted code isn't too gross in this common case. + MergeBlockIntoPredecessor(OrigHeader, this); + + ++NumRotated; + return true; +} +
diff --git a/src/LLVM/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/src/LLVM/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4e343e3..3e122c2 100644 --- a/src/LLVM/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/src/LLVM/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -63,18 +63,34 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; +namespace llvm { +cl::opt<bool> EnableNested( + "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); + +cl::opt<bool> EnableRetry( + "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); + +// Temporary flag to cleanup congruent phis after LSR phi expansion. +// It's currently disabled until we can determine whether it's truly useful or +// not. The flag should be removed after the v3.0 release. +cl::opt<bool> EnablePhiElim( + "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); +} + namespace { /// RegSortData - This class holds data which is used to order reuse candidates. @@ -113,7 +129,7 @@ public: void CountRegister(const SCEV *Reg, size_t LUIdx); void DropRegister(const SCEV *Reg, size_t LUIdx); - void DropUse(size_t LUIdx); + void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx); bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; @@ -152,18 +168,27 @@ } void -RegUseTracker::DropUse(size_t LUIdx) { - // Remove the use index from every register's use list. +RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) { + assert(LUIdx <= LastLUIdx); + + // Update RegUses. The data structure is not optimized for this purpose; + // we must iterate through it and update each of the bit vectors. for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end(); - I != E; ++I) - I->second.UsedByIndices.reset(LUIdx); + I != E; ++I) { + SmallBitVector &UsedByIndices = I->second.UsedByIndices; + if (LUIdx < UsedByIndices.size()) + UsedByIndices[LUIdx] = + LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0; + UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); + } } bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { - if (!RegUsesMap.count(Reg)) return false; - const SmallBitVector &UsedByIndices = - RegUsesMap.find(Reg)->second.UsedByIndices; + RegUsesTy::const_iterator I = RegUsesMap.find(Reg); + if (I == RegUsesMap.end()) + return false; + const SmallBitVector &UsedByIndices = I->second.UsedByIndices; int i = UsedByIndices.find_first(); if (i == -1) return false; if ((size_t)i != LUIdx) return true; @@ -199,13 +224,17 @@ /// when AM.Scale is not zero. const SCEV *ScaledReg; - Formula() : ScaledReg(0) {} + /// UnfoldedOffset - An additional constant offset which added near the + /// use. This requires a temporary register, but the offset itself can + /// live in an add immediate field rather than a register. + int64_t UnfoldedOffset; - void InitialMatch(const SCEV *S, Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + Formula() : ScaledReg(0), UnfoldedOffset(0) {} + + void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); unsigned getNumRegs() const; - const Type *getType() const; + Type *getType() const; void DeleteBaseReg(const SCEV *&S); @@ -223,9 +252,9 @@ static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl<const SCEV *> &Good, SmallVectorImpl<const SCEV *> &Bad, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE) { // Collect expressions which properly dominate the loop header. - if (S->properlyDominates(L->getHeader(), &DT)) { + if (SE.properlyDominates(S, L->getHeader())) { Good.push_back(S); return; } @@ -234,18 +263,19 @@ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - DoInitialMatch(*I, L, Good, Bad, SE, DT); + DoInitialMatch(*I, L, Good, Bad, SE); return; } // Look at addrec operands. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) if (!AR->getStart()->isZero()) { - DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); + DoInitialMatch(AR->getStart(), L, Good, Bad, SE); DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), - L, Good, Bad, SE, DT); + // FIXME: AR->getNoWrapFlags() + AR->getLoop(), SCEV::FlagAnyWrap), + L, Good, Bad, SE); return; } @@ -257,7 +287,7 @@ SmallVector<const SCEV *, 4> MyGood; SmallVector<const SCEV *, 4> MyBad; - DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT); + DoInitialMatch(NewMul, L, MyGood, MyBad, SE); const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( SE.getEffectiveSCEVType(NewMul->getType()))); for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(), @@ -277,11 +307,10 @@ /// InitialMatch - Incorporate loop-variant parts of S into this Formula, /// attempting to keep all loop-invariant and loop-computable values in a /// single base register. -void Formula::InitialMatch(const SCEV *S, Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { +void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { SmallVector<const SCEV *, 4> Good; SmallVector<const SCEV *, 4> Bad; - DoInitialMatch(S, L, Good, Bad, SE, DT); + DoInitialMatch(S, L, Good, Bad, SE); if (!Good.empty()) { const SCEV *Sum = SE.getAddExpr(Good); if (!Sum->isZero()) @@ -305,7 +334,7 @@ /// getType - Return the type of this formula, if it has one, or null /// otherwise. This type is meaningless except for the bit size. -const Type *Formula::getType() const { +Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : AM.BaseGV ? AM.BaseGV->getType() : @@ -370,6 +399,10 @@ OS << "<unknown>"; OS << ')'; } + if (UnfoldedOffset != 0) { + if (!First) OS << " + "; else First = false; + OS << "imm(" << UnfoldedOffset << ')'; + } } void Formula::dump() const { @@ -379,7 +412,7 @@ /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); } @@ -387,7 +420,7 @@ /// isAddSExtable - Return true if the given add can be sign-extended /// without changing its value. static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } @@ -395,7 +428,7 @@ /// isMulSExtable - Return true if the given mul can be sign-extended /// without changing its value. static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); @@ -441,13 +474,16 @@ // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { - const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, - IgnoreSignificantBits); - if (!Start) return 0; const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); if (!Step) return 0; - return SE.getAddRecExpr(Start, Step, AR->getLoop()); + const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; + // FlagNW is independent of the start value, step direction, and is + // preserved with smaller magnitude steps. + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } return 0; } @@ -505,12 +541,16 @@ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddExpr(NewOps); + if (Result != 0) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result != 0) + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -528,12 +568,16 @@ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); - S = SE.getAddExpr(NewOps); + if (Result) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result) + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -552,9 +596,6 @@ switch (II->getIntrinsicID()) { default: break; case Intrinsic::prefetch: - case Intrinsic::x86_sse2_loadu_dq: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse_loadu_ps: case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: @@ -568,8 +609,8 @@ } /// getAccessType - Return the type of the memory being accessed. -static const Type *getAccessType(const Instruction *Inst) { - const Type *AccessTy = Inst->getType(); +static Type *getAccessType(const Instruction *Inst) { + Type *AccessTy = Inst->getType(); if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) AccessTy = SI->getOperand(0)->getType(); else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -588,7 +629,7 @@ // All pointers have the same requirements, so canonicalize them to an // arbitrary pointer type to minimize variation. - if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy)) + if (PointerType *PTy = dyn_cast<PointerType>(AccessTy)) AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), PTy->getAddressSpace()); @@ -603,7 +644,7 @@ bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); + Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()); if (I == 0 || !isInstructionTriviallyDead(I)) continue; @@ -640,12 +681,25 @@ : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0), SetupCost(0) {} - unsigned getNumRegs() const { return NumRegs; } - bool operator<(const Cost &Other) const; void Loose(); +#ifndef NDEBUG + // Once any of the metrics loses, they must all remain losers. + bool isValid() { + return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds + | ImmCost | SetupCost) != ~0u) + || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds + & ImmCost & SetupCost) == ~0u); + } +#endif + + bool isLoser() { + assert(isValid() && "invalid cost"); + return NumRegs == ~0u; + } + void RateFormula(const Formula &F, SmallPtrSet<const SCEV *, 16> &Regs, const DenseSet<const SCEV *> &VisitedRegs, @@ -678,34 +732,48 @@ if (AR->getLoop() == L) AddRecCost += 1; /// TODO: This should be a function of the stride. - // If this is an addrec for a loop that's already been visited by LSR, - // don't second-guess its addrec phi nodes. LSR isn't currently smart - // enough to reason about more than one loop at a time. Consider these - // registers free and leave them alone. - else if (L->contains(AR->getLoop()) || + // If this is an addrec for another loop, don't second-guess its addrec phi + // nodes. LSR isn't currently smart enough to reason about more than one + // loop at a time. LSR has either already run on inner loops, will not run + // on other loops, and cannot be expected to change sibling loops. If the + // AddRec exists, consider it's register free and leave it alone. Otherwise, + // do not consider this formula at all. + // FIXME: why do we need to generate such fomulae? + else if (!EnableNested || L->contains(AR->getLoop()) || (!AR->getLoop()->contains(L) && DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHINode *PN = dyn_cast<PHINode>(I); ++I) { if (SE.isSCEVable(PN->getType()) && (SE.getEffectiveSCEVType(PN->getType()) == SE.getEffectiveSCEVType(AR->getType())) && SE.getSCEV(PN) == AR) return; - + } + if (!EnableNested) { + Loose(); + return; + } // If this isn't one of the addrecs that the loop already has, it // would require a costly new phi and add. TODO: This isn't // precisely modeled right now. ++NumBaseAdds; - if (!Regs.count(AR->getStart())) + if (!Regs.count(AR->getStart())) { RateRegister(AR->getStart(), Regs, L, SE, DT); + if (isLoser()) + return; + } } // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. - if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) - if (!Regs.count(AR->getStart())) + if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) { + if (!Regs.count(AR->getOperand(1))) { RateRegister(AR->getOperand(1), Regs, L, SE, DT); + if (isLoser()) + return; + } + } } ++NumRegs; @@ -717,6 +785,9 @@ (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) || isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) ++SetupCost; + + NumIVMuls += isa<SCEVMulExpr>(Reg) && + SE.hasComputableLoopEvolution(Reg, L); } /// RatePrimaryRegister - Record this register in the set. If we haven't seen it @@ -742,6 +813,8 @@ return; } RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + if (isLoser()) + return; } for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { @@ -751,13 +824,14 @@ return; } RatePrimaryRegister(BaseReg, Regs, L, SE, DT); - - NumIVMuls += isa<SCEVMulExpr>(BaseReg) && - BaseReg->hasComputableLoopEvolution(L); + if (isLoser()) + return; } - if (F.BaseRegs.size() > 1) - NumBaseAdds += F.BaseRegs.size() - 1; + // Determine how many (unfolded) adds we'll need inside the loop. + size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); + if (NumBaseParts > 1) + NumBaseAdds += NumBaseParts - 1; // Tally up the non-zero immediates. for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), @@ -769,9 +843,10 @@ else if (Offset != 0) ImmCost += APInt(64, Offset, true).getMinSignedBits(); } + assert(isValid() && "invalid cost"); } -/// Loose - Set this cost to a loosing value. +/// Loose - Set this cost to a losing value. void Cost::Loose() { NumRegs = ~0u; AddRecCost = ~0u; @@ -954,7 +1029,7 @@ }; KindType Kind; - const Type *AccessTy; + Type *AccessTy; SmallVector<int64_t, 8> Offsets; int64_t MinOffset; @@ -969,7 +1044,7 @@ /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different /// max fixup widths to be equivalent, because the narrower one may be relying /// on the implicit truncation to truncate away bogus bits. - const Type *WidestFixupType; + Type *WidestFixupType; /// Formulae - A list of ways to build a value that can satisfy this user. /// After the list is populated, one of these is selected heuristically and @@ -979,7 +1054,7 @@ /// Regs - The set of register candidates used by all formulae in this LSRUse. SmallPtrSet<const SCEV *, 4> Regs; - LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), + LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true), @@ -990,8 +1065,6 @@ void DeleteFormula(Formula &F); void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); - void check() const; - void print(raw_ostream &OS) const; void dump() const; }; @@ -1103,7 +1176,7 @@ /// be completely folded into the user instruction at isel time. This includes /// address-mode folding and special icmp tricks. static bool isLegalUse(const TargetLowering::AddrMode &AM, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { switch (Kind) { case LSRUse::Address: @@ -1132,7 +1205,7 @@ // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. if (AM.BaseOffs != 0) { - if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs); + if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs); return false; } @@ -1152,7 +1225,7 @@ static bool isLegalUse(TargetLowering::AddrMode AM, int64_t MinOffset, int64_t MaxOffset, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { // Check for overflow. if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != @@ -1174,7 +1247,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs, GlobalValue *BaseGV, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { // Fast-path: zero is always foldable. if (BaseOffs == 0 && !BaseGV) return true; @@ -1200,7 +1273,7 @@ static bool isAlwaysFoldable(const SCEV *S, int64_t MinOffset, int64_t MaxOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI, ScalarEvolution &SE) { // Fast-path: zero is always foldable. @@ -1254,32 +1327,6 @@ } }; -/// FormulaSorter - This class implements an ordering for formulae which sorts -/// the by their standalone cost. -class FormulaSorter { - /// These two sets are kept empty, so that we compute standalone costs. - DenseSet<const SCEV *> VisitedRegs; - SmallPtrSet<const SCEV *, 16> Regs; - Loop *L; - LSRUse *LU; - ScalarEvolution &SE; - DominatorTree &DT; - -public: - FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt) - : L(l), LU(&lu), SE(se), DT(dt) {} - - bool operator()(const Formula &A, const Formula &B) { - Cost CostA; - CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT); - Regs.clear(); - Cost CostB; - CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT); - Regs.clear(); - return CostA < CostB; - } -}; - /// LSRInstance - This class holds state for the main loop strength reduction /// logic. class LSRInstance { @@ -1301,7 +1348,7 @@ SmallSetVector<int64_t, 8> Factors; /// Types - Interesting use types, to facilitate truncation reuse. - SmallSetVector<const Type *, 4> Types; + SmallSetVector<Type *, 4> Types; /// Fixups - The list of operands which are to be replaced. SmallVector<LSRFixup, 16> Fixups; @@ -1332,13 +1379,13 @@ UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy); + LSRUse::KindType Kind, Type *AccessTy); std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind, - const Type *AccessTy); + Type *AccessTy); - void DeleteUse(LSRUse &LU); + void DeleteUse(LSRUse &LU, size_t LUIdx); LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); @@ -1364,6 +1411,10 @@ void FilterOutUndesirableDedicatedRegisters(); size_t EstimateSearchSpaceComplexity() const; + void NarrowSearchSpaceByDetectingSupersets(); + void NarrowSearchSpaceByCollapsingUnrolledCode(); + void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, @@ -1424,7 +1475,8 @@ IVUsers::const_iterator CandidateUI = UI; ++UI; Instruction *ShadowUse = CandidateUI->getUser(); - const Type *DestTy = NULL; + Type *DestTy = NULL; + bool IsSigned = false; /* If shadow use is a int->float cast then insert a second IV to eliminate this cast. @@ -1438,10 +1490,14 @@ for (unsigned i = 0; i < n; ++i, ++d) foo(d); */ - if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) + if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) { + IsSigned = false; DestTy = UCast->getDestTy(); - else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) + } + else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) { + IsSigned = true; DestTy = SCast->getDestTy(); + } if (!DestTy) continue; if (TLI) { @@ -1455,7 +1511,7 @@ if (!PH) continue; if (PH->getNumIncomingValues() != 2) continue; - const Type *SrcTy = PH->getType(); + Type *SrcTy = PH->getType(); int Mantissa = DestTy->getFPMantissaWidth(); if (Mantissa == -1) continue; if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) @@ -1472,7 +1528,9 @@ ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; - Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, IsSigned ? + (double)Init->getSExtValue() : + (double)Init->getZExtValue()); BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -1497,14 +1555,14 @@ if (!C->getValue().isStrictlyPositive()) continue; /* Add new PHINode. */ - PHINode *NewPH = PHINode::Create(DestTy, PH); + PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); /* create new increment. '++d' in above example. */ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, - NewPH, CFP, Incr); + NewPH, CFP, "IV.S.next.", Incr); NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); @@ -1597,7 +1655,7 @@ const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); // Add one to the backedge-taken count to get the trip count. - const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); if (IterationCount != SE.getSCEV(Sel)) return Cond; // Check for a max calculation that matches the pattern. There's no check @@ -1680,7 +1738,7 @@ // Ok, everything looks ok to change the condition into an SLT or SGE and // delete the max calculation. ICmpInst *NewCond = - new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS); + new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp"); // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); @@ -1774,7 +1832,7 @@ if (!TLI) goto decline_post_inc; // Check for possible scaled-address reuse. - const Type *AccessTy = getAccessType(UI->getUser()); + Type *AccessTy = getAccessType(UI->getUser()); TargetLowering::AddrMode AM; AM.Scale = C->getSExtValue(); if (TLI->isLegalAddressingMode(AM, AccessTy)) @@ -1833,15 +1891,15 @@ } } -/// reconcileNewOffset - Determine if the given use can accomodate a fixup +/// reconcileNewOffset - Determine if the given use can accommodate a fixup /// at the given offset and other details. If so, update the use and /// return true. bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy) { int64_t NewMinOffset = LU.MinOffset; int64_t NewMaxOffset = LU.MaxOffset; - const Type *NewAccessTy = AccessTy; + Type *NewAccessTy = AccessTy; // Check for a mismatched kind. It's tempting to collapse mismatched kinds to // something conservative, however this can pessimize in the case that one of @@ -1880,7 +1938,7 @@ /// Either reuse an existing use or create a new one, as needed. std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr, - LSRUse::KindType Kind, const Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy) { const SCEV *Copy = Expr; int64_t Offset = ExtractImmediate(Expr, SE); @@ -1918,10 +1976,13 @@ } /// DeleteUse - Delete the given use from the Uses list. -void LSRInstance::DeleteUse(LSRUse &LU) { +void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { if (&LU != &Uses.back()) std::swap(LU, Uses.back()); Uses.pop_back(); + + // Update RegUses. + RegUses.SwapAndDropUse(LUIdx, Uses.size()); } /// FindUseWithFormula - Look for a use distinct from OrigLU which is has @@ -1929,33 +1990,42 @@ LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, const LSRUse &OrigLU) { - // Search all uses for the formula. This could be more clever. Ignore - // ICmpZero uses because they may contain formulae generated by - // GenerateICmpZeroScales, in which case adding fixup offsets may - // be invalid. + // Search all uses for the formula. This could be more clever. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; + // Check whether this use is close enough to OrigLU, to see whether it's + // worthwhile looking through its formulae. + // Ignore ICmpZero uses because they may contain formulae generated by + // GenerateICmpZeroScales, in which case adding fixup offsets may + // be invalid. if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero && LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && LU.WidestFixupType == OrigLU.WidestFixupType && LU.HasFormulaWithSameRegs(OrigF)) { + // Scan through this use's formulae. for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; + // Check to see if this formula has the same registers and symbols + // as OrigF. if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && F.AM.Scale == OrigF.AM.Scale && - LU.Kind) { + F.UnfoldedOffset == OrigF.UnfoldedOffset) { if (F.AM.BaseOffs == 0) return &LU; + // This is the formula where all the registers and symbols matched; + // there aren't going to be any others. Since we declined it, we + // can skip the rest of the formulae and procede to the next LSRUse. break; } } } } + // Nothing looked good. return 0; } @@ -2030,7 +2100,7 @@ LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; - const Type *AccessTy = 0; + Type *AccessTy = 0; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; AccessTy = getAccessType(LF.UserInst); @@ -2058,7 +2128,11 @@ // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (N->isLoopInvariant(L)) { + if (SE.isLoopInvariant(N, L)) { + // S is normalized, so normalize N before folding it into S + // to keep the result normalized. + N = TransformForPostIncUse(Normalize, N, CI, 0, + LF.PostIncLoops, SE, DT); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); } @@ -2098,7 +2172,7 @@ void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; - F.InitialMatch(S, L, SE, DT); + F.InitialMatch(S, L, SE); bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Initial formula already exists!"); (void)Inserted; } @@ -2198,7 +2272,7 @@ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) { unsigned OtherIdx = !UI.getOperandNo(); Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx)); - if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L)) + if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L)) continue; } @@ -2226,23 +2300,24 @@ /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, - SmallVectorImpl<const SCEV *> &UninterestingOps, const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); + CollectSubexprs(*I, C, Ops, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), - C, Ops, UninterestingOps, L, SE); - CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); + AR->getLoop(), + //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap), + C, Ops, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { @@ -2252,17 +2327,13 @@ dyn_cast<SCEVConstant>(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, UninterestingOps, L, SE); + Ops, L, SE); return; } } - // Otherwise use the value itself. Loop-variant "unknown" values are - // uninteresting; we won't be able to do anything meaningful with them. - if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L)) - UninterestingOps.push_back(S); - else - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself, optionally with a scale applied. + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2276,19 +2347,19 @@ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps; - CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); - - // Add any uninteresting values as one register, as we won't be able to - // form any interesting reassociation opportunities with them. They'll - // just have to be added inside the loop no matter what we do. - if (!UninterestingAddOps.empty()) - AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + SmallVector<const SCEV *, 8> AddOps; + CollectSubexprs(BaseReg, 0, AddOps, L, SE); if (AddOps.size() == 1) continue; for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), JE = AddOps.end(); J != JE; ++J) { + + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) + continue; + // Don't pull a constant into a register if the constant could be folded // into an immediate field. if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, @@ -2314,8 +2385,29 @@ if (InnerSum->isZero()) continue; Formula F = Base; - F.BaseRegs[i] = InnerSum; - F.BaseRegs.push_back(*J); + + // Add the remaining pieces of the add back into the new formula. + const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); + if (TLI && InnerSumSC && + SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue(); + F.BaseRegs.erase(F.BaseRegs.begin() + i); + } else + F.BaseRegs[i] = InnerSum; + + // Add J as its own register, or an unfolded immediate. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); + if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue(); + else + F.BaseRegs.push_back(*J); + if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like // it. @@ -2337,8 +2429,8 @@ for (SmallVectorImpl<const SCEV *>::const_iterator I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) { const SCEV *BaseReg = *I; - if (BaseReg->properlyDominates(L->getHeader(), &DT) && - !BaseReg->hasComputableLoopEvolution(L)) + if (SE.properlyDominates(BaseReg, L->getHeader()) && + !SE.hasComputableLoopEvolution(BaseReg, L)) Ops.push_back(BaseReg); else F.BaseRegs.push_back(BaseReg); @@ -2396,7 +2488,7 @@ if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. if (NewG->isZero()) { std::swap(F.BaseRegs[i], F.BaseRegs.back()); @@ -2428,7 +2520,7 @@ if (LU.Kind != LSRUse::ICmpZero) return; // Determine the integer type for the base formula. - const Type *IntTy = Base.getType(); + Type *IntTy = Base.getType(); if (!IntTy) return; if (SE.getTypeSizeInBits(IntTy) > 64) return; @@ -2483,6 +2575,15 @@ continue; } + // Check that multiplying with the unfolded offset doesn't overflow. + if (F.UnfoldedOffset != 0) { + if (F.UnfoldedOffset == INT64_MIN && Factor == -1) + continue; + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; + if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) + continue; + } + // If we make it here and it's legal, add it. (void)InsertFormula(LU, LUIdx, F); next:; @@ -2493,7 +2594,7 @@ /// scaled-offset address modes, for example. void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // Determine the integer type for the base formula. - const Type *IntTy = Base.getType(); + Type *IntTy = Base.getType(); if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. @@ -2553,13 +2654,13 @@ if (Base.AM.BaseGV) return; // Determine the integer type for the base formula. - const Type *DstTy = Base.getType(); + Type *DstTy = Base.getType(); if (!DstTy) return; DstTy = SE.getEffectiveSCEVType(DstTy); - for (SmallSetVector<const Type *, 4>::const_iterator + for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { - const Type *SrcTy = *I; + Type *SrcTy = *I; if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { Formula F = Base; @@ -2665,7 +2766,7 @@ // other orig regs. ImmMapTy::const_iterator OtherImms[] = { Imms.begin(), prior(Imms.end()), - Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) + Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) }; for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) { ImmMapTy::const_iterator M = OtherImms[i]; @@ -2696,7 +2797,7 @@ int64_t Imm = WI.Imm; const SCEV *OrigReg = WI.OrigReg; - const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); + Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); unsigned BitWidth = SE.getTypeSizeInBits(IntTy); @@ -2722,7 +2823,7 @@ // value to the immediate would produce a value closer to zero than the // immediate itself, then the formula isn't worthwhile. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) - if (C->getValue()->getValue().isNegative() != + if (C->getValue()->isNegative() != (NewF.AM.BaseOffs < 0) && (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) .ule(abs64(NewF.AM.BaseOffs))) @@ -2739,8 +2840,13 @@ Formula NewF = F; NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) - continue; + LU.Kind, LU.AccessTy, TLI)) { + if (!TLI || + !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) + continue; + NewF = F; + NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; + } NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); // If the new formula has a constant in a register, and adding the @@ -2797,11 +2903,17 @@ } GenerateCrossUseConstantOffsets(); + + DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); } -/// If their are multiple formulae with the same set of registers used +/// If there are multiple formulae with the same set of registers used /// by other uses, pick the best one and delete the others. void LSRInstance::FilterOutUndesirableDedicatedRegisters() { + DenseSet<const SCEV *> VisitedRegs; + SmallPtrSet<const SCEV *, 16> Regs; #ifndef NDEBUG bool ChangedFormulae = false; #endif @@ -2814,7 +2926,6 @@ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; - FormulaSorter Sorter(L, LU, SE, DT); DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); bool Any = false; @@ -2840,7 +2951,14 @@ BestFormulae.insert(std::make_pair(Key, FIdx)); if (!P.second) { Formula &Best = LU.Formulae[P.first->second]; - if (Sorter.operator()(F, Best)) + + Cost CostF; + CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT); + Regs.clear(); + Cost CostBest; + CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); + Regs.clear(); + if (CostF < CostBest) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -2880,7 +2998,7 @@ /// this many solutions because it prune the search space, but the pruning /// isn't always sufficient. size_t LSRInstance::EstimateSearchSpaceComplexity() const { - uint32_t Power = 1; + size_t Power = 1; for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { size_t FSize = I->Formulae.size(); @@ -2895,11 +3013,11 @@ return Power; } -/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of -/// formulae to choose from, use some rough heuristics to prune down the number -/// of formulae. This keeps the main solver from taking an extraordinary amount -/// of time in some worst-case scenarios. -void LSRInstance::NarrowSearchSpaceUsingHeuristics() { +/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset +/// of the registers of another formula, it won't help reduce register +/// pressure (though it may not necessarily hurt register pressure); remove +/// it to simplify the system. +void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2957,7 +3075,12 @@ DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} +/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers +/// for expressions like A, A+1, A+2, etc., allocate a single register for +/// them. +void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2982,6 +3105,28 @@ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.AM.BaseOffs; + // Add the new offset to LUThatHas' offset list. + if (LUThatHas->Offsets.back() != Fixup.Offset) { + LUThatHas->Offsets.push_back(Fixup.Offset); + if (Fixup.Offset > LUThatHas->MaxOffset) + LUThatHas->MaxOffset = Fixup.Offset; + if (Fixup.Offset < LUThatHas->MinOffset) + LUThatHas->MinOffset = Fixup.Offset; + } + DEBUG(dbgs() << "New fixup has offset " + << Fixup.Offset << '\n'); + } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; + } + // Delete formulae from the new use which are no longer legal. bool Any = false; for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { @@ -3000,22 +3145,8 @@ if (Any) LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); - // Update the relocs to reference the new use. - for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.AM.BaseOffs; - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } - // Delete the old use. - DeleteUse(LU); + DeleteUse(LU, LUIdx); --LUIdx; --NumUses; break; @@ -3028,7 +3159,30 @@ DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that +/// we've done more filtering, as it may be able to find more formulae to +/// eliminate. +void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); + + DEBUG(dbgs() << "Narrowing the search space by re-filtering out " + "undesirable dedicated registers.\n"); + + FilterOutUndesirableDedicatedRegisters(); + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } +} + +/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely +/// to be profitable, and then in any use which has any reference to that +/// register, delete all formulae which do not reference that register. +void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // With all other options exhausted, loop until the system is simple // enough to handle. SmallPtrSet<const SCEV *, 4> Taken; @@ -3090,6 +3244,17 @@ } } +/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of +/// formulae to choose from, use some rough heuristics to prune down the number +/// of formulae. This keeps the main solver from taking an extraordinary amount +/// of time in some worst-case scenarios. +void LSRInstance::NarrowSearchSpaceUsingHeuristics() { + NarrowSearchSpaceByDetectingSupersets(); + NarrowSearchSpaceByCollapsingUnrolledCode(); + NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + NarrowSearchSpaceByPickingWinnerRegs(); +} + /// SolveRecurse - This is the recursive solver. void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, Cost &SolutionCost, @@ -3166,6 +3331,9 @@ skip:; } + if (!EnableRetry && !AnySatisfiedReqRegs) + return; + // If none of the formulae had all of the required registers, relax the // constraint so that we don't exclude all formulae. if (!AnySatisfiedReqRegs) { @@ -3189,6 +3357,10 @@ // SolveRecurse does all the work. SolveRecurse(Solution, SolutionCost, Workspace, CurCost, CurRegs, VisitedRegs); + if (Solution.empty()) { + DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); + return; + } // Ok, we've now made all our decisions. DEBUG(dbgs() << "\n" @@ -3307,8 +3479,11 @@ // Don't insert instructions before PHI nodes. while (isa<PHINode>(IP)) ++IP; + // Ignore landingpad instructions. + while (isa<LandingPadInst>(IP)) ++IP; + // Ignore debug intrinsics. - while (ISA_DEBUG_INFO_INTRINSIC(IP)) ++IP; + while (isa<DbgInfoIntrinsic>(IP)) ++IP; return IP; } @@ -3331,9 +3506,9 @@ Rewriter.setPostInc(LF.PostIncLoops); // This is the type that the user actually needs. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); // This will be the type that we'll initially expand to. - const Type *Ty = F.getType(); + Type *Ty = F.getType(); if (!Ty) // No type known; just expand directly to the ultimate type. Ty = OpTy; @@ -3341,7 +3516,7 @@ // Expand directly to the ultimate type if it's the right size. Ty = OpTy; // This is the type to do integer arithmetic in. - const Type *IntTy = SE.getEffectiveSCEVType(Ty); + Type *IntTy = SE.getEffectiveSCEVType(Ty); // Build up a list of operands to add together to form the full base. SmallVector<const SCEV *, 8> Ops; @@ -3418,7 +3593,7 @@ // The other interesting way of "folding" with an ICmpZero is to use a // negated immediate. if (!ICmpScaledV) - ICmpScaledV = ConstantInt::get(IntTy, -Offset); + ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); else { Ops.push_back(SE.getUnknown(ICmpScaledV)); ICmpScaledV = ConstantInt::get(IntTy, Offset); @@ -3430,6 +3605,14 @@ } } + // Expand the unfolded offset portion. + int64_t UnfoldedOffset = F.UnfoldedOffset; + if (UnfoldedOffset != 0) { + // Just add the immediate values. + Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, + UnfoldedOffset))); + } + // Emit instructions summing all the operands. const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : @@ -3452,7 +3635,7 @@ Instruction *Cast = CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false), - ICmpScaledV, OpTy, CI); + ICmpScaledV, OpTy, "tmp", CI); ICmpScaledV = Cast; } CI->setOperand(1, ICmpScaledV); @@ -3493,21 +3676,33 @@ // is the canonical backedge for this loop, which complicates post-inc // users. if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(BB->getTerminator()) && - (PN->getParent() != L->getHeader() || !L->contains(BB))) { - // Split the critical edge. - BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P); + !isa<IndirectBrInst>(BB->getTerminator())) { + BasicBlock *Parent = PN->getParent(); + Loop *PNLoop = LI.getLoopFor(Parent); + if (!PNLoop || Parent != PNLoop->getHeader()) { + // Split the critical edge. + BasicBlock *NewBB = 0; + if (!Parent->isLandingPad()) { + NewBB = SplitCriticalEdge(BB, Parent, P, + /*MergeIdenticalEdges=*/true, + /*DontDeleteUselessPhis=*/true); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs); + NewBB = NewBBs[0]; + } - // If PN is outside of the loop and BB is in the loop, we want to - // move the block to be immediately before the PHI block, not - // immediately after BB. - if (L->contains(BB) && !L->contains(PN)) - NewBB->moveBefore(PN->getParent()); + // If PN is outside of the loop and BB is in the loop, we want to + // move the block to be immediately before the PHI block, not + // immediately after BB. + if (L->contains(BB) && !L->contains(PN)) + NewBB->moveBefore(PN->getParent()); - // Splitting the edge can reduce the number of PHI entries we have. - e = PN->getNumIncomingValues(); - BB = NewBB; - i = PN->getBasicBlockIndex(BB); + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); + BB = NewBB; + i = PN->getBasicBlockIndex(BB); + } } std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = @@ -3518,13 +3713,13 @@ Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); if (FullV->getType() != OpTy) FullV = CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), FullV, LF.OperandValToReplace->getType(), - BB->getTerminator()); + "tmp", BB->getTerminator()); PN->setIncomingValue(i, FullV); Pair.first->second = FullV; @@ -3548,11 +3743,11 @@ Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); if (FullV->getType() != OpTy) { Instruction *Cast = CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), - FullV, OpTy, LF.UserInst); + FullV, OpTy, "tmp", LF.UserInst); FullV = Cast; } @@ -3579,8 +3774,9 @@ // we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(SE); + SCEVExpander Rewriter(SE, "lsr"); Rewriter.disableCanonicalMode(); + Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Expand the new value definitions and update the users. @@ -3621,6 +3817,23 @@ OptimizeShadowIV(); OptimizeLoopTermCond(); + // If loop preparation eliminates all interesting IV users, bail. + if (IU.empty()) return; + + // Skip nested loops until we can model them better with formulae. + if (!EnableNested && !L->empty()) { + + if (EnablePhiElim) { + // Remove any extra phis created by processing inner loops. + SmallVector<WeakVH, 16> DeadInsts; + SCEVExpander Rewriter(SE, "lsr"); + Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + } + DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); + return; + } + // Start collecting data and preparing for the solver. CollectInterestingTypesAndFactors(); CollectFixupsAndInitialFormulae(); @@ -3633,10 +3846,6 @@ // to formulate the values needed for the uses. GenerateAllReuseFormulae(); - DEBUG(dbgs() << "\n" - "After generating reuse formulae:\n"; - print_uses(dbgs())); - FilterOutUndesirableDedicatedRegisters(); NarrowSearchSpaceUsingHeuristics(); @@ -3648,6 +3857,9 @@ Types.clear(); RegUses.clear(); + if (Solution.empty()) + return; + #ifndef NDEBUG // Formulae should be legal. for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), @@ -3663,6 +3875,14 @@ // Now that we've decided what we want, make it so. ImplementSolution(Solution, P); + + if (EnablePhiElim) { + // Remove any extra phis created by processing inner loops. + SmallVector<WeakVH, 16> DeadInsts; + SCEVExpander Rewriter(SE, "lsr"); + Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + } } void LSRInstance::print_factors_and_types(raw_ostream &OS) const { @@ -3678,7 +3898,7 @@ OS << '*' << *I; } - for (SmallSetVector<const Type *, 4>::const_iterator + for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { if (!First) OS << ", "; First = false; @@ -3743,21 +3963,30 @@ } char LoopStrengthReduce::ID = 0; -INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce", - "Loop Strength Reduction", false, false); +INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(IVUsers) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) + Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(ID), TLI(tli) {} + : LoopPass(ID), TLI(tli) { + initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); + } void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved("domfrontier"); AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); @@ -3766,6 +3995,9 @@ AU.addPreserved<DominatorTree>(); AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); + // Requiring LoopSimplify a second time here prevents IVUsers from running + // twice, since LoopSimplify was invalidated by running ScalarEvolution. + AU.addRequiredID(LoopSimplifyID); AU.addRequired<IVUsers>(); AU.addPreserved<IVUsers>(); }
diff --git a/src/LLVM/lib/Transforms/Scalar/LoopUnrollPass.cpp b/src/LLVM/lib/Transforms/Scalar/LoopUnrollPass.cpp new file mode 100644 index 0000000..91395b2 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -0,0 +1,214 @@ +//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements a simple loop unroller. It works best when loops have +// been canonicalized by the -indvars pass, allowing it to determine the trip +// counts of loops easily. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Target/TargetData.h" +#include <climits> + +using namespace llvm; + +static cl::opt<unsigned> +UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, + cl::desc("The cut-off point for automatic loop unrolling")); + +static cl::opt<unsigned> +UnrollCount("unroll-count", cl::init(0), cl::Hidden, + cl::desc("Use this unroll count for all loops, for testing purposes")); + +static cl::opt<bool> +UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, + cl::desc("Allows loops to be partially unrolled until " + "-unroll-threshold loop size is reached.")); + +// Temporary flag to be removed in 3.0 +static cl::opt<bool> +NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden, + cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling")); + +namespace { + class LoopUnroll : public LoopPass { + public: + static char ID; // Pass ID, replacement for typeid + LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) { + CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); + CurrentCount = (C == -1) ? UnrollCount : unsigned(C); + CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; + + UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); + + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); + } + + /// A magic value for use with the Threshold parameter to indicate + /// that the loop unroll should be performed regardless of how much + /// code expansion would result. + static const unsigned NoThreshold = UINT_MAX; + + // Threshold to use when optsize is specified (and there is no + // explicit -unroll-threshold). + static const unsigned OptSizeUnrollThreshold = 50; + + unsigned CurrentCount; + unsigned CurrentThreshold; + bool CurrentAllowPartial; + bool UserThreshold; // CurrentThreshold is user-specified. + + bool runOnLoop(Loop *L, LPPassManager &LPM); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired<ScalarEvolution>(); + AU.addPreserved<ScalarEvolution>(); + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. + // If loop unroll does not preserve dom info then LCSSA pass on next + // loop will receive invalid dom info. + // For now, recreate dom info, if loop is unrolled. + AU.addPreserved<DominatorTree>(); + } + }; +} + +char LoopUnroll::ID = 0; +INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) + +Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { + return new LoopUnroll(Threshold, Count, AllowPartial); +} + +/// ApproximateLoopSize - Approximate the size of the loop. +static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, + const TargetData *TD) { + CodeMetrics Metrics; + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + Metrics.analyzeBasicBlock(*I, TD); + NumCalls = Metrics.NumInlineCandidates; + + unsigned LoopSize = Metrics.NumInsts; + + // Don't allow an estimate of size zero. This would allows unrolling of loops + // with huge iteration counts, which is a compile time problem even if it's + // not a problem for code quality. + if (LoopSize == 0) LoopSize = 1; + + return LoopSize; +} + +bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { + LoopInfo *LI = &getAnalysis<LoopInfo>(); + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); + + BasicBlock *Header = L->getHeader(); + DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() + << "] Loop %" << Header->getName() << "\n"); + (void)Header; + + // Determine the current unrolling threshold. While this is normally set + // from UnrollThreshold, it is overridden to a smaller value if the current + // function is marked as optimize-for-size, and the unroll threshold was + // not user specified. + unsigned Threshold = CurrentThreshold; + if (!UserThreshold && + Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Threshold = OptSizeUnrollThreshold; + + // Find trip count and trip multiple if count is not available + unsigned TripCount = 0; + unsigned TripMultiple = 1; + if (!NoSCEVUnroll) { + // Find "latch trip count". UnrollLoop assumes that control cannot exit + // via the loop latch on any iteration prior to TripCount. The loop may exit + // early via an earlier branch. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + TripCount = SE->getSmallConstantTripCount(L, LatchBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); + } + } + else { + TripCount = L->getSmallConstantTripCount(); + if (TripCount == 0) + TripMultiple = L->getSmallConstantTripMultiple(); + } + // Automatically select an unroll count. + unsigned Count = CurrentCount; + if (Count == 0) { + // Conservative heuristic: if we know the trip count, see if we can + // completely unroll (subject to the threshold, checked below); otherwise + // try to find greatest modulo of the trip count which is still under + // threshold value. + if (TripCount == 0) + return false; + Count = TripCount; + } + + // Enforce the threshold. + if (Threshold != NoThreshold) { + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + unsigned NumInlineCandidates; + unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD); + DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + if (NumInlineCandidates != 0) { + DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); + return false; + } + uint64_t Size = (uint64_t)LoopSize*Count; + if (TripCount != 1 && Size > Threshold) { + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << Threshold << "\n"); + if (!CurrentAllowPartial) { + DEBUG(dbgs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); + return false; + } + // Reduce unroll count to be modulo of TripCount for partial unrolling + Count = Threshold / LoopSize; + while (Count != 0 && TripCount%Count != 0) { + Count--; + } + if (Count < 2) { + DEBUG(dbgs() << " could not unroll partially\n"); + return false; + } + DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); + } + } + + // Unroll the loop. + if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM)) + return false; + + return true; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/LoopUnswitch.cpp b/src/LLVM/lib/Transforms/Scalar/LoopUnswitch.cpp new file mode 100644 index 0000000..458949c --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -0,0 +1,1083 @@ +//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms loops that contain branches on loop-invariant conditions +// to have multiple loops. For example, it turns the left into the right code: +// +// for (...) if (lic) +// A for (...) +// if (lic) A; B; C +// B else +// C for (...) +// A; C +// +// This can increase the size of the code exponentially (doubling it every time +// a loop is unswitched) so we only unswitch if the resultant code will be +// smaller than a threshold. +// +// This pass expects LICM to be run before it to hoist invariant conditions out +// of the loop, to make the unswitching opportunity obvious. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unswitch" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <set> +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumSelects , "Number of selects unswitched"); +STATISTIC(NumTrivial , "Number of unswitches that are trivial"); +STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); + +// The specific value of 50 here was chosen based only on intuition and a +// few specific examples. +static cl::opt<unsigned> +Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), + cl::init(50), cl::Hidden); + +namespace { + class LoopUnswitch : public LoopPass { + LoopInfo *LI; // Loop information + LPPassManager *LPM; + + // LoopProcessWorklist - Used to check if second loop needs processing + // after RewriteLoopBodyWithConditionConstant rewrites first loop. + std::vector<Loop*> LoopProcessWorklist; + SmallPtrSet<Value *,8> UnswitchedVals; + + bool OptimizeForSize; + bool redoLoop; + + Loop *currentLoop; + DominatorTree *DT; + BasicBlock *loopHeader; + BasicBlock *loopPreheader; + + // LoopBlocks contains all of the basic blocks of the loop, including the + // preheader of the loop, the body of the loop, and the exit blocks of the + // loop, in that order. + std::vector<BasicBlock*> LoopBlocks; + // NewBlocks contained cloned copy of basic blocks from LoopBlocks. + std::vector<BasicBlock*> NewBlocks; + + public: + static char ID; // Pass ID, replacement for typeid + explicit LoopUnswitch(bool Os = false) : + LoopPass(ID), OptimizeForSize(Os), redoLoop(false), + currentLoop(NULL), DT(NULL), loopHeader(NULL), + loopPreheader(NULL) { + initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM); + bool processCurrentLoop(); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addPreserved<DominatorTree>(); + AU.addPreserved<ScalarEvolution>(); + } + + private: + + virtual void releaseMemory() { + UnswitchedVals.clear(); + } + + /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, + /// remove it. + void RemoveLoopFromWorklist(Loop *L) { + std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(), + LoopProcessWorklist.end(), L); + if (I != LoopProcessWorklist.end()) + LoopProcessWorklist.erase(I); + } + + void initLoopData() { + loopHeader = currentLoop->getHeader(); + loopPreheader = currentLoop->getLoopPreheader(); + } + + /// Split all of the edges from inside the loop to their exit blocks. + /// Update the appropriate Phi nodes as we do so. + void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks); + + bool UnswitchIfProfitable(Value *LoopCond, Constant *Val); + void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, + BasicBlock *ExitBlock); + void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L); + + void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, + Constant *Val, bool isEqual); + + void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, + BasicBlock *FalseDest, + Instruction *InsertPt); + + void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L); + void RemoveBlockIfDead(BasicBlock *BB, + std::vector<Instruction*> &Worklist, Loop *l); + void RemoveLoopFromHierarchy(Loop *L); + bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0, + BasicBlock **LoopExit = 0); + + }; +} +char LoopUnswitch::ID = 0; +INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", + false, false) + +Pass *llvm::createLoopUnswitchPass(bool Os) { + return new LoopUnswitch(Os); +} + +/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is +/// invariant in the loop, or has an invariant piece, return the invariant. +/// Otherwise, return null. +static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { + // We can never unswitch on vector conditions. + if (Cond->getType()->isVectorTy()) + return 0; + + // Constants should be folded, not unswitched on! + if (isa<Constant>(Cond)) return 0; + + // TODO: Handle: br (VARIANT|INVARIANT). + + // Hoist simple values out. + if (L->makeLoopInvariant(Cond, Changed)) + return Cond; + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) + if (BO->getOpcode() == Instruction::And || + BO->getOpcode() == Instruction::Or) { + // If either the left or right side is invariant, we can unswitch on this, + // which will cause the branch to go away in one loop and the condition to + // simplify in the other one. + if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed)) + return LHS; + if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed)) + return RHS; + } + + return 0; +} + +bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { + LI = &getAnalysis<LoopInfo>(); + LPM = &LPM_Ref; + DT = getAnalysisIfAvailable<DominatorTree>(); + currentLoop = L; + Function *F = currentLoop->getHeader()->getParent(); + bool Changed = false; + do { + assert(currentLoop->isLCSSAForm(*DT)); + redoLoop = false; + Changed |= processCurrentLoop(); + } while(redoLoop); + + if (Changed) { + // FIXME: Reconstruct dom info, because it is not preserved properly. + if (DT) + DT->runOnFunction(*F); + } + return Changed; +} + +/// processCurrentLoop - Do actual work and unswitch loop if possible +/// and profitable. +bool LoopUnswitch::processCurrentLoop() { + bool Changed = false; + LLVMContext &Context = currentLoop->getHeader()->getContext(); + + // Loop over all of the basic blocks in the loop. If we find an interior + // block that is branching on a loop-invariant condition, we can unswitch this + // loop. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); I != E; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + // If this isn't branching on an invariant condition, we can't unswitch + // it. + if (BI->isConditional()) { + // See if this, or some part of it, is loop invariant. If so, we can + // unswitch on it if we desire. + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), + currentLoop, Changed); + if (LoopCond && UnswitchIfProfitable(LoopCond, + ConstantInt::getTrue(Context))) { + ++NumBranches; + return true; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), + currentLoop, Changed); + if (LoopCond && SI->getNumCases() > 1) { + // Find a value to unswitch on: + // FIXME: this should chose the most expensive case! + // FIXME: scan for a case with a non-critical edge? + Constant *UnswitchVal = SI->getCaseValue(1); + // Do not process same value again and again. + if (!UnswitchedVals.insert(UnswitchVal)) + continue; + + if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { + ++NumSwitches; + return true; + } + } + } + + // Scan the instructions to check for unswitchable values. + for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); + BBI != E; ++BBI) + if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), + currentLoop, Changed); + if (LoopCond && UnswitchIfProfitable(LoopCond, + ConstantInt::getTrue(Context))) { + ++NumSelects; + return true; + } + } + } + return Changed; +} + +/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the +/// loop with no side effects (including infinite loops). +/// +/// If true, we return true and set ExitBB to the block we +/// exit through. +/// +static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, + BasicBlock *&ExitBB, + std::set<BasicBlock*> &Visited) { + if (!Visited.insert(BB).second) { + // Already visited. Without more analysis, this could indicate an infinte loop. + return false; + } else if (!L->contains(BB)) { + // Otherwise, this is a loop exit, this is fine so long as this is the + // first exit. + if (ExitBB != 0) return false; + ExitBB = BB; + return true; + } + + // Otherwise, this is an unvisited intra-loop node. Check all successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { + // Check to see if the successor is a trivial loop exit. + if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited)) + return false; + } + + // Okay, everything after this looks good, check to make sure that this block + // doesn't include any side effects. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (I->mayHaveSideEffects()) + return false; + + return true; +} + +/// isTrivialLoopExitBlock - Return true if the specified block unconditionally +/// leads to an exit from the specified loop, and has no side-effects in the +/// process. If so, return the block that is exited to, otherwise return null. +static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { + std::set<BasicBlock*> Visited; + Visited.insert(L->getHeader()); // Branches to header make infinite loops. + BasicBlock *ExitBB = 0; + if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) + return ExitBB; + return 0; +} + +/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is +/// trivial: that is, that the condition controls whether or not the loop does +/// anything at all. If this is a trivial condition, unswitching produces no +/// code duplications (equivalently, it produces a simpler loop and a new empty +/// loop, which gets deleted). +/// +/// If this is a trivial condition, return true, otherwise return false. When +/// returning true, this sets Cond and Val to the condition that controls the +/// trivial condition: when Cond dynamically equals Val, the loop is known to +/// exit. Finally, this sets LoopExit to the BB that the loop exits to when +/// Cond == Val. +/// +bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, + BasicBlock **LoopExit) { + BasicBlock *Header = currentLoop->getHeader(); + TerminatorInst *HeaderTerm = Header->getTerminator(); + LLVMContext &Context = Header->getContext(); + + BasicBlock *LoopExitBB = 0; + if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { + // If the header block doesn't end with a conditional branch on Cond, we + // can't handle it. + if (!BI->isConditional() || BI->getCondition() != Cond) + return false; + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any + // side-effects. If so, determine the value of Cond that causes it to do + // this. + if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + BI->getSuccessor(0)))) { + if (Val) *Val = ConstantInt::getTrue(Context); + } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + BI->getSuccessor(1)))) { + if (Val) *Val = ConstantInt::getFalse(Context); + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) { + // If this isn't a switch on Cond, we can't handle it. + if (SI->getCondition() != Cond) return false; + + // Check to see if a successor of the switch is guaranteed to go to the + // latch block or exit through a one exit block without having any + // side-effects. If so, determine the value of Cond that causes it to do + // this. Note that we can't trivially unswitch on the default case. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) + if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + SI->getSuccessor(i)))) { + // Okay, we found a trivial case, remember the value that is trivial. + if (Val) *Val = SI->getCaseValue(i); + break; + } + } + + // If we didn't find a single unique LoopExit block, or if the loop exit block + // contains phi nodes, this isn't trivial. + if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) + return false; // Can't handle this. + + if (LoopExit) *LoopExit = LoopExitBB; + + // We already know that nothing uses any scalar values defined inside of this + // loop. As such, we just have to check to see if this loop will execute any + // side-effecting instructions (e.g. stores, calls, volatile loads) in the + // part of the loop that the code *would* execute. We already checked the + // tail, check the header now. + for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I) + if (I->mayHaveSideEffects()) + return false; + return true; +} + +/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when +/// LoopCond == Val to simplify the loop. If we decide that this is profitable, +/// unswitch the loop, reprocess the pieces, then return true. +bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { + + initLoopData(); + + // If LoopSimplify was unable to form a preheader, don't do any unswitching. + if (!loopPreheader) + return false; + + Function *F = loopHeader->getParent(); + + Constant *CondVal = 0; + BasicBlock *ExitBlock = 0; + if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { + // If the condition is trivial, always unswitch. There is no code growth + // for this case. + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock); + return true; + } + + // Check to see if it would be profitable to unswitch current loop. + + // Do not do non-trivial unswitch while optimizing for size. + if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + // FIXME: This is overly conservative because it does not take into + // consideration code simplification opportunities and code that can + // be shared by the resultant unswitched loops. + CodeMetrics Metrics; + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); + I != E; ++I) + Metrics.analyzeBasicBlock(*I); + + // Limit the number of instructions to avoid causing significant code + // expansion, and the number of basic blocks, to avoid loops with + // large numbers of branches which cause loop unswitching to go crazy. + // This is a very ad-hoc heuristic. + if (Metrics.NumInsts > Threshold || + Metrics.NumBlocks * 5 > Threshold || + Metrics.containsIndirectBr || Metrics.isRecursive) { + DEBUG(dbgs() << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() << ", cost too high: " + << currentLoop->getBlocks().size() << "\n"); + return false; + } + + UnswitchNontrivialCondition(LoopCond, Val, currentLoop); + return true; +} + +/// CloneLoop - Recursively clone the specified loop and all of its children, +/// mapping the blocks with the specified map. +static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, + LoopInfo *LI, LPPassManager *LPM) { + Loop *New = new Loop(); + LPM->insertLoop(New, PL); + + // Add all of the blocks in L to the new loop. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) + New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase()); + + // Add all of the subloops to the new loop. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + CloneLoop(*I, New, VM, LI, LPM); + + return New; +} + +/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values +/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the +/// code immediately before InsertPt. +void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, + BasicBlock *TrueDest, + BasicBlock *FalseDest, + Instruction *InsertPt) { + // Insert a conditional branch on LIC to the two preheaders. The original + // code is the true version and the new code is the false version. + Value *BranchVal = LIC; + if (!isa<ConstantInt>(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val); + else if (Val != ConstantInt::getTrue(Val->getContext())) + // We want to enter the new loop when the condition is true. + std::swap(TrueDest, FalseDest); + + // Insert the new branch. + BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + + // If either edge is critical, split it. This helps preserve LoopSimplify + // form for enclosing loops. + SplitCriticalEdge(BI, 0, this); + SplitCriticalEdge(BI, 1, this); +} + +/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable +/// condition in it (a cond branch from its header block to its latch block, +/// where the path through the loop that doesn't execute its body has no +/// side-effects), unswitch it. This doesn't involve any code duplication, just +/// moving the conditional branch outside of the loop and updating loop info. +void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, + Constant *Val, + BasicBlock *ExitBlock) { + DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << L->getHeader()->getParent()->getName() + << " on cond: " << *Val << " == " << *Cond << "\n"); + + // First step, split the preheader, so that we know that there is a safe place + // to insert the conditional branch. We will change loopPreheader to have a + // conditional branch on Cond. + BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the exit block out of the loop that we should + // short-circuit to. + + // Split this block now, so that the loop maintains its exit block, and so + // that the jump from the preheader can execute the contents of the exit block + // without actually branching to it (the exit block should be dominated by the + // loop header, not the preheader). + assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); + BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this); + + // Okay, now we have a position to branch from and a position to branch to, + // insert the new conditional branch. + EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, + loopPreheader->getTerminator()); + LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); + loopPreheader->getTerminator()->eraseFromParent(); + + // We need to reprocess this loop, it could be unswitched again. + redoLoop = true; + + // Now that we know that the loop is never entered when this condition is a + // particular value, rewrite the loop with this info. We know that this will + // at least eliminate the old branch. + RewriteLoopBodyWithConditionConstant(L, Cond, Val, false); + ++NumTrivial; +} + +/// SplitExitEdges - Split all of the edges from inside the loop to their exit +/// blocks. Update the appropriate Phi nodes as we do so. +void LoopUnswitch::SplitExitEdges(Loop *L, + const SmallVector<BasicBlock *, 8> &ExitBlocks){ + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), + pred_end(ExitBlock)); + + // Although SplitBlockPredecessors doesn't preserve loop-simplify in + // general, if we call it on all predecessors of all exits then it does. + if (!ExitBlock->isLandingPad()) { + SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), + ".us-lcssa", this); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", + this, NewBBs); + } + } +} + +/// UnswitchNontrivialCondition - We determined that the loop is profitable +/// to unswitch when LIC equal Val. Split it into loop versions and test the +/// condition outside of either loop. Return the loops created as Out1/Out2. +void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, + Loop *L) { + Function *F = loopHeader->getParent(); + DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << F->getName() + << " when '" << *Val << "' == " << *LIC << "\n"); + + if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) + SE->forgetLoop(L); + + LoopBlocks.clear(); + NewBlocks.clear(); + + // First step, split the preheader and exit blocks, and add these blocks to + // the LoopBlocks list. + BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this); + LoopBlocks.push_back(NewPreheader); + + // We want the loop to come after the preheader, but before the exit blocks. + LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); + + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + + // Split all of the edges from inside the loop to their exit blocks. Update + // the appropriate Phi nodes as we do so. + SplitExitEdges(L, ExitBlocks); + + // The exit blocks may have been changed due to edge splitting, recompute. + ExitBlocks.clear(); + L->getUniqueExitBlocks(ExitBlocks); + + // Add exit blocks to the loop blocks. + LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); + + // Next step, clone all of the basic blocks that make up the loop (including + // the loop preheader and exit blocks), keeping track of the mapping between + // the instructions and blocks. + NewBlocks.reserve(LoopBlocks.size()); + ValueToValueMapTy VMap; + for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); + NewBlocks.push_back(NewBB); + VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); + } + + // Splice the newly inserted blocks into the function right before the + // original preheader. + F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Now we create the new Loop object for the versioned loop. + Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); + Loop *ParentLoop = L->getParentLoop(); + if (ParentLoop) { + // Make sure to add the cloned preheader and exit blocks to the parent loop + // as well. + ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); + } + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); + // The new exit block should be in the same loop as the old one. + if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) + ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); + + assert(NewExit->getTerminator()->getNumSuccessors() == 1 && + "Exit block should have been split to have one successor!"); + BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); + + // If the successor of the exit block had PHI nodes, add an entry for + // NewExit. + PHINode *PN; + for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { + PN = cast<PHINode>(I); + Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); + ValueToValueMapTy::iterator It = VMap.find(V); + if (It != VMap.end()) V = It->second; + PN->addIncoming(V, NewExit); + } + + if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { + PN = PHINode::Create(LPad->getType(), 0, "", + ExitSucc->getFirstInsertionPt()); + + for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); + I != E; ++I) { + BasicBlock *BB = *I; + LandingPadInst *LPI = BB->getLandingPadInst(); + LPI->replaceAllUsesWith(PN); + PN->addIncoming(LPI, BB); + } + } + } + + // Rewrite the code to refer to itself. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) + RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + + // Rewrite the original preheader to select between versions of the loop. + BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); + assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && + "Preheader splitting did not work correctly!"); + + // Emit the new branch that selects between the two versions of this loop. + EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); + LPM->deleteSimpleAnalysisValue(OldBR, L); + OldBR->eraseFromParent(); + + LoopProcessWorklist.push_back(NewLoop); + redoLoop = true; + + // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody + // deletes the instruction (for example by simplifying a PHI that feeds into + // the condition that we're unswitching on), we don't rewrite the second + // iteration. + WeakVH LICHandle(LIC); + + // Now we rewrite the original code to know that the condition is true and the + // new code to know that the condition is false. + RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); + + // It's possible that simplifying one loop could cause the other to be + // changed to another value or a constant. If its a constant, don't simplify + // it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && + LICHandle && !isa<Constant>(LICHandle)) + RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); +} + +/// RemoveFromWorklist - Remove all instances of I from the worklist vector +/// specified. +static void RemoveFromWorklist(Instruction *I, + std::vector<Instruction*> &Worklist) { + std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(), + Worklist.end(), I); + while (WI != Worklist.end()) { + unsigned Offset = WI-Worklist.begin(); + Worklist.erase(WI); + WI = std::find(Worklist.begin()+Offset, Worklist.end(), I); + } +} + +/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the +/// program, replacing all uses with V and update the worklist. +static void ReplaceUsesOfWith(Instruction *I, Value *V, + std::vector<Instruction*> &Worklist, + Loop *L, LPPassManager *LPM) { + DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); + + // Add uses to the worklist, which may be dead now. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) + Worklist.push_back(Use); + + // Add users to the worklist which may be simplified now. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + Worklist.push_back(cast<Instruction>(*UI)); + LPM->deleteSimpleAnalysisValue(I, L); + RemoveFromWorklist(I, Worklist); + I->replaceAllUsesWith(V); + I->eraseFromParent(); + ++NumSimplify; +} + +/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop +/// information, and remove any dead successors it has. +/// +void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, + std::vector<Instruction*> &Worklist, + Loop *L) { + if (pred_begin(BB) != pred_end(BB)) { + // This block isn't dead, since an edge to BB was just removed, see if there + // are any easy simplifications we can do now. + if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // If it has one pred, fold phi nodes in BB. + while (isa<PHINode>(BB->begin())) + ReplaceUsesOfWith(BB->begin(), + cast<PHINode>(BB->begin())->getIncomingValue(0), + Worklist, L, LPM); + + // If this is the header of a loop and the only pred is the latch, we now + // have an unreachable loop. + if (Loop *L = LI->getLoopFor(BB)) + if (loopHeader == BB && L->contains(Pred)) { + // Remove the branch from the latch to the header block, this makes + // the header dead, which will make the latch dead (because the header + // dominates the latch). + LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); + Pred->getTerminator()->eraseFromParent(); + new UnreachableInst(BB->getContext(), Pred); + + // The loop is now broken, remove it from LI. + RemoveLoopFromHierarchy(L); + + // Reprocess the header, which now IS dead. + RemoveBlockIfDead(BB, Worklist, L); + return; + } + + // If pred ends in a uncond branch, add uncond branch to worklist so that + // the two blocks will get merged. + if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) + if (BI->isUnconditional()) + Worklist.push_back(BI); + } + return; + } + + DEBUG(dbgs() << "Nuking dead block: " << *BB); + + // Remove the instructions in the basic block from the worklist. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + RemoveFromWorklist(I, Worklist); + + // Anything that uses the instructions in this basic block should have their + // uses replaced with undefs. + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + } + + // If this is the edge to the header block for a loop, remove the loop and + // promote all subloops. + if (Loop *BBLoop = LI->getLoopFor(BB)) { + if (BBLoop->getLoopLatch() == BB) { + RemoveLoopFromHierarchy(BBLoop); + if (currentLoop == BBLoop) { + currentLoop = 0; + redoLoop = false; + } + } + } + + // Remove the block from the loop info, which removes it from any loops it + // was in. + LI->removeBlock(BB); + + + // Remove phi node entries in successors for this block. + TerminatorInst *TI = BB->getTerminator(); + SmallVector<BasicBlock*, 4> Succs; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + Succs.push_back(TI->getSuccessor(i)); + TI->getSuccessor(i)->removePredecessor(BB); + } + + // Unique the successors, remove anything with multiple uses. + array_pod_sort(Succs.begin(), Succs.end()); + Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end()); + + // Remove the basic block, including all of the instructions contained in it. + LPM->deleteSimpleAnalysisValue(BB, L); + BB->eraseFromParent(); + // Remove successor blocks here that are not dead, so that we know we only + // have dead blocks in this list. Nondead blocks have a way of becoming dead, + // then getting removed before we revisit them, which is badness. + // + for (unsigned i = 0; i != Succs.size(); ++i) + if (pred_begin(Succs[i]) != pred_end(Succs[i])) { + // One exception is loop headers. If this block was the preheader for a + // loop, then we DO want to visit the loop so the loop gets deleted. + // We know that if the successor is a loop header, that this loop had to + // be the preheader: the case where this was the latch block was handled + // above and headers can only have two predecessors. + if (!LI->isLoopHeader(Succs[i])) { + Succs.erase(Succs.begin()+i); + --i; + } + } + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + RemoveBlockIfDead(Succs[i], Worklist, L); +} + +/// RemoveLoopFromHierarchy - We have discovered that the specified loop has +/// become unwrapped, either because the backedge was deleted, or because the +/// edge into the header was removed. If the edge into the header from the +/// latch block was removed, the loop is unwrapped but subloops are still alive, +/// so they just reparent loops. If the loops are actually dead, they will be +/// removed later. +void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) { + LPM->deleteLoopFromQueue(L); + RemoveLoopFromWorklist(L); +} + +// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has +// the value specified by Val in the specified loop, or we know it does NOT have +// that value. Rewrite any uses of LIC or of properties correlated to it. +void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, + Constant *Val, + bool IsEqual) { + assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); + + // FIXME: Support correlated properties, like: + // for (...) + // if (li1 < li2) + // ... + // if (li1 > li2) + // ... + + // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, + // selects, switches. + std::vector<Instruction*> Worklist; + LLVMContext &Context = Val->getContext(); + + + // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC + // in the loop with the appropriate one directly. + if (IsEqual || (isa<ConstantInt>(Val) && + Val->getType()->isIntegerTy(1))) { + Value *Replacement; + if (IsEqual) + Replacement = Val; + else + Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), + !cast<ConstantInt>(Val)->getZExtValue()); + + for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); + UI != E; ++UI) { + Instruction *U = dyn_cast<Instruction>(*UI); + if (!U || !L->contains(U)) + continue; + U->replaceUsesOfWith(LIC, Replacement); + Worklist.push_back(U); + } + SimplifyCode(Worklist, L); + return; + } + + // Otherwise, we don't know the precise value of LIC, but we do know that it + // is certainly NOT "Val". As such, simplify any uses in the loop that we + // can. This case occurs when we unswitch switch statements. + for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); + UI != E; ++UI) { + Instruction *U = dyn_cast<Instruction>(*UI); + if (!U || !L->contains(U)) + continue; + + Worklist.push_back(U); + + // TODO: We could do other simplifications, for example, turning + // 'icmp eq LIC, Val' -> false. + + // If we know that LIC is not Val, use this info to simplify code. + SwitchInst *SI = dyn_cast<SwitchInst>(U); + if (SI == 0 || !isa<ConstantInt>(Val)) continue; + + unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); + if (DeadCase == 0) continue; // Default case is live for multiple values. + + // Found a dead case value. Don't remove PHI nodes in the + // successor if they become single-entry, those PHI nodes may + // be in the Users list. + + BasicBlock *Switch = SI->getParent(); + BasicBlock *SISucc = SI->getSuccessor(DeadCase); + BasicBlock *Latch = L->getLoopLatch(); + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. + // If the DeadCase successor dominates the loop latch, then the + // transformation isn't safe since it will delete the sole predecessor edge + // to the latch. + if (Latch && DT->dominates(SISucc, Latch)) + continue; + + // FIXME: This is a hack. We need to keep the successor around + // and hooked up so as to preserve the loop structure, because + // trying to update it is complicated. So instead we preserve the + // loop structure and put the block on a dead code path. + SplitEdge(Switch, SISucc, this); + // Compute the successors instead of relying on the return value + // of SplitEdge, since it may have split the switch successor + // after PHI nodes. + BasicBlock *NewSISucc = SI->getSuccessor(DeadCase); + BasicBlock *OldSISucc = *succ_begin(NewSISucc); + // Create an "unreachable" destination. + BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", + Switch->getParent(), + OldSISucc); + new UnreachableInst(Context, Abort); + // Force the new case destination to branch to the "unreachable" + // block while maintaining a (dead) CFG edge to the old block. + NewSISucc->getTerminator()->eraseFromParent(); + BranchInst::Create(Abort, OldSISucc, + ConstantInt::getTrue(Context), NewSISucc); + // Release the PHI operands for this edge. + for (BasicBlock::iterator II = NewSISucc->begin(); + PHINode *PN = dyn_cast<PHINode>(II); ++II) + PN->setIncomingValue(PN->getBasicBlockIndex(Switch), + UndefValue::get(PN->getType())); + // Tell the domtree about the new block. We don't fully update the + // domtree here -- instead we force it to do a full recomputation + // after the pass is complete -- but we do need to inform it of + // new blocks. + if (DT) + DT->addNewBlock(Abort, NewSISucc); + } + + SimplifyCode(Worklist, L); +} + +/// SimplifyCode - Okay, now that we have simplified some instructions in the +/// loop, walk over it and constant prop, dce, and fold control flow where +/// possible. Note that this is effectively a very simple loop-structure-aware +/// optimizer. During processing of this loop, L could very well be deleted, so +/// it must not be used. +/// +/// FIXME: When the loop optimizer is more mature, separate this out to a new +/// pass. +/// +void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + + // Simple DCE. + if (isInstructionTriviallyDead(I)) { + DEBUG(dbgs() << "Remove dead instruction '" << *I); + + // Add uses to the worklist, which may be dead now. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) + Worklist.push_back(Use); + LPM->deleteSimpleAnalysisValue(I, L); + RemoveFromWorklist(I, Worklist); + I->eraseFromParent(); + ++NumSimplify; + continue; + } + + // See if instruction simplification can hack this up. This is common for + // things like "select false, X, Y" after unswitching made the condition be + // 'false'. + if (Value *V = SimplifyInstruction(I, 0, DT)) + if (LI->replacementPreservesLCSSAForm(I, V)) { + ReplaceUsesOfWith(I, V, Worklist, L, LPM); + continue; + } + + // Special case hacks that appear commonly in unswitched code. + if (BranchInst *BI = dyn_cast<BranchInst>(I)) { + if (BI->isUnconditional()) { + // If BI's parent is the only pred of the successor, fold the two blocks + // together. + BasicBlock *Pred = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); + BasicBlock *SinglePred = Succ->getSinglePredecessor(); + if (!SinglePred) continue; // Nothing to do. + assert(SinglePred == Pred && "CFG broken"); + + DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " + << Succ->getName() << "\n"); + + // Resolve any single entry PHI nodes in Succ. + while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) + ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); + + // If Succ has any successors with PHI nodes, update them to have + // entries coming from Pred instead of Succ. + Succ->replaceAllUsesWith(Pred); + + // Move all of the successor contents from Succ to Pred. + Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), + Succ->end()); + LPM->deleteSimpleAnalysisValue(BI, L); + BI->eraseFromParent(); + RemoveFromWorklist(BI, Worklist); + + // Remove Succ from the loop tree. + LI->removeBlock(Succ); + LPM->deleteSimpleAnalysisValue(Succ, L); + Succ->eraseFromParent(); + ++NumSimplify; + continue; + } + + if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){ + // Conditional branch. Turn it into an unconditional branch, then + // remove dead blocks. + continue; // FIXME: Enable. + + DEBUG(dbgs() << "Folded branch: " << *BI); + BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); + BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); + DeadSucc->removePredecessor(BI->getParent(), true); + Worklist.push_back(BranchInst::Create(LiveSucc, BI)); + LPM->deleteSimpleAnalysisValue(BI, L); + BI->eraseFromParent(); + RemoveFromWorklist(BI, Worklist); + ++NumSimplify; + + RemoveBlockIfDead(DeadSucc, Worklist, L); + } + continue; + } + } +}
diff --git a/src/LLVM/lib/Transforms/Scalar/LowerAtomic.cpp b/src/LLVM/lib/Transforms/Scalar/LowerAtomic.cpp new file mode 100644 index 0000000..689bbe9 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,142 @@ +//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loweratomic" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Support/IRBuilder.h" +using namespace llvm; + +static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { + IRBuilder<> Builder(CXI->getParent(), CXI); + Value *Ptr = CXI->getPointerOperand(); + Value *Cmp = CXI->getCompareOperand(); + Value *Val = CXI->getNewValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + CXI->replaceAllUsesWith(Orig); + CXI->eraseFromParent(); + return true; +} + +static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { + IRBuilder<> Builder(RMWI->getParent(), RMWI); + Value *Ptr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Res = NULL; + + switch (RMWI->getOperation()) { + default: llvm_unreachable("Unexpected RMW operation"); + case AtomicRMWInst::Xchg: + Res = Val; + break; + case AtomicRMWInst::Add: + Res = Builder.CreateAdd(Orig, Val); + break; + case AtomicRMWInst::Sub: + Res = Builder.CreateSub(Orig, Val); + break; + case AtomicRMWInst::And: + Res = Builder.CreateAnd(Orig, Val); + break; + case AtomicRMWInst::Nand: + Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val)); + break; + case AtomicRMWInst::Or: + Res = Builder.CreateOr(Orig, Val); + break; + case AtomicRMWInst::Xor: + Res = Builder.CreateXor(Orig, Val); + break; + case AtomicRMWInst::Max: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), + Val, Orig); + break; + case AtomicRMWInst::Min: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), + Orig, Val); + break; + case AtomicRMWInst::UMax: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), + Val, Orig); + break; + case AtomicRMWInst::UMin: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), + Orig, Val); + break; + } + Builder.CreateStore(Res, Ptr); + RMWI->replaceAllUsesWith(Orig); + RMWI->eraseFromParent(); + return true; +} + +static bool LowerFenceInst(FenceInst *FI) { + FI->eraseFromParent(); + return true; +} + +static bool LowerLoadInst(LoadInst *LI) { + LI->setAtomic(NotAtomic); + return true; +} + +static bool LowerStoreInst(StoreInst *SI) { + SI->setAtomic(NotAtomic); + return true; +} + +namespace { + struct LowerAtomic : public BasicBlockPass { + static char ID; + LowerAtomic() : BasicBlockPass(ID) { + initializeLowerAtomicPass(*PassRegistry::getPassRegistry()); + } + bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { + Instruction *Inst = DI++; + if (FenceInst *FI = dyn_cast<FenceInst>(Inst)) + Changed |= LowerFenceInst(FI); + else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst)) + Changed |= LowerAtomicCmpXchgInst(CXI); + else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst)) + Changed |= LowerAtomicRMWInst(RMWI); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (LI->isAtomic()) + LowerLoadInst(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->isAtomic()) + LowerStoreInst(SI); + } + } + return Changed; + } + }; +} + +char LowerAtomic::ID = 0; +INITIALIZE_PASS(LowerAtomic, "loweratomic", + "Lower atomic intrinsics to non-atomic form", + false, false) + +Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/src/LLVM/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/src/LLVM/lib/Transforms/Scalar/MemCpyOptimizer.cpp new file mode 100644 index 0000000..eeb8931 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -0,0 +1,986 @@ +//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs various transformations related to eliminating memcpy +// calls, or transforming sets of stores into memset's. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "memcpyopt" +#include "llvm/Transforms/Scalar.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include <list> +using namespace llvm; + +STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); +STATISTIC(NumMemSetInfer, "Number of memsets inferred"); +STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); +STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); + +static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, + bool &VariableIdxFound, const TargetData &TD){ + // Skip over the first indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (unsigned i = 1; i != Idx; ++i, ++GTI) + /*skip along*/; + + // Compute the offset implied by the rest of the indices. + int64_t Offset = 0; + for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (OpC == 0) + return VariableIdxFound = true; + if (OpC->isZero()) continue; // No offset. + + // Handle struct indices, which add their field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + continue; + } + + // Otherwise, we have a sequential type like an array or vector. Multiply + // the index by the ElementSize. + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*OpC->getSExtValue(); + } + + return Offset; +} + +/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a +/// constant offset, and return that constant offset. For example, Ptr1 might +/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. +static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, + const TargetData &TD) { + Ptr1 = Ptr1->stripPointerCasts(); + Ptr2 = Ptr2->stripPointerCasts(); + GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1); + GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2); + + bool VariableIdxFound = false; + + // If one pointer is a GEP and the other isn't, then see if the GEP is a + // constant offset from the base, as in "P" and "gep P, 1". + if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { + Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); + return !VariableIdxFound; + } + + if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { + Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); + return !VariableIdxFound; + } + + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical + // base. After that base, they may have some number of common (and + // potentially variable) indices. After that they handle some constant + // offset, which determines their offset from each other. At this point, we + // handle no other case. + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + return false; + + // Skip any common indices and track the GEP types. + unsigned Idx = 1; + for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) + if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) + break; + + int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); + int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); + if (VariableIdxFound) return false; + + Offset = Offset2-Offset1; + return true; +} + + +/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. +/// This allows us to analyze stores like: +/// store 0 -> P+1 +/// store 0 -> P+0 +/// store 0 -> P+3 +/// store 0 -> P+2 +/// which sometimes happens with stores to arrays of structs etc. When we see +/// the first store, we make a range [1, 2). The second store extends the range +/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the +/// two ranges into [0, 3) which is memset'able. +namespace { +struct MemsetRange { + // Start/End - A semi range that describes the span that this range covers. + // The range is closed at the start and open at the end: [Start, End). + int64_t Start, End; + + /// StartPtr - The getelementptr instruction that points to the start of the + /// range. + Value *StartPtr; + + /// Alignment - The known alignment of the first store. + unsigned Alignment; + + /// TheStores - The actual stores that make up this range. + SmallVector<Instruction*, 16> TheStores; + + bool isProfitableToUseMemset(const TargetData &TD) const; + +}; +} // end anon namespace + +bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { + // If we found more than 8 stores to merge or 64 bytes, use memset. + if (TheStores.size() >= 8 || End-Start >= 64) return true; + + // If there is nothing to merge, don't do anything. + if (TheStores.size() < 2) return false; + + // If any of the stores are a memset, then it is always good to extend the + // memset. + for (unsigned i = 0, e = TheStores.size(); i != e; ++i) + if (!isa<StoreInst>(TheStores[i])) + return true; + + // Assume that the code generator is capable of merging pairs of stores + // together if it wants to. + if (TheStores.size() == 2) return false; + + // If we have fewer than 8 stores, it can still be worthwhile to do this. + // For example, merging 4 i8 stores into an i32 store is useful almost always. + // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the + // memset will be split into 2 32-bit stores anyway) and doing so can + // pessimize the llvm optimizer. + // + // Since we don't have perfect knowledge here, make some assumptions: assume + // the maximum GPR width is the same size as the pointer size and assume that + // this width can be stored. If so, check to see whether we will end up + // actually reducing the number of stores used. + unsigned Bytes = unsigned(End-Start); + unsigned NumPointerStores = Bytes/TD.getPointerSize(); + + // Assume the remaining bytes if any are done a byte at a time. + unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); + + // If we will reduce the # stores (according to this heuristic), do the + // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 + // etc. + return TheStores.size() > NumPointerStores+NumByteStores; +} + + +namespace { +class MemsetRanges { + /// Ranges - A sorted list of the memset ranges. We use std::list here + /// because each element is relatively large and expensive to copy. + std::list<MemsetRange> Ranges; + typedef std::list<MemsetRange>::iterator range_iterator; + const TargetData &TD; +public: + MemsetRanges(const TargetData &td) : TD(td) {} + + typedef std::list<MemsetRange>::const_iterator const_iterator; + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } + bool empty() const { return Ranges.empty(); } + + void addInst(int64_t OffsetFromFirst, Instruction *Inst) { + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + addStore(OffsetFromFirst, SI); + else + addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); + } + + void addStore(int64_t OffsetFromFirst, StoreInst *SI) { + int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); + + addRange(OffsetFromFirst, StoreSize, + SI->getPointerOperand(), SI->getAlignment(), SI); + } + + void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { + int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); + } + + void addRange(int64_t Start, int64_t Size, Value *Ptr, + unsigned Alignment, Instruction *Inst); + +}; + +} // end anon namespace + + +/// addRange - Add a new store to the MemsetRanges data structure. This adds a +/// new range for the specified store at the specified offset, merging into +/// existing ranges as appropriate. +/// +/// Do a linear search of the ranges to see if this can be joined and/or to +/// find the insertion point in the list. We keep the ranges sorted for +/// simplicity here. This is a linear search of a linked list, which is ugly, +/// however the number of ranges is limited, so this won't get crazy slow. +void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, + unsigned Alignment, Instruction *Inst) { + int64_t End = Start+Size; + range_iterator I = Ranges.begin(), E = Ranges.end(); + + while (I != E && Start > I->End) + ++I; + + // We now know that I == E, in which case we didn't find anything to merge + // with, or that Start <= I->End. If End < I->Start or I == E, then we need + // to insert a new range. Handle this now. + if (I == E || End < I->Start) { + MemsetRange &R = *Ranges.insert(I, MemsetRange()); + R.Start = Start; + R.End = End; + R.StartPtr = Ptr; + R.Alignment = Alignment; + R.TheStores.push_back(Inst); + return; + } + + // This store overlaps with I, add it. + I->TheStores.push_back(Inst); + + // At this point, we may have an interval that completely contains our store. + // If so, just add it to the interval and return. + if (I->Start <= Start && I->End >= End) + return; + + // Now we know that Start <= I->End and End >= I->Start so the range overlaps + // but is not entirely contained within the range. + + // See if the range extends the start of the range. In this case, it couldn't + // possibly cause it to join the prior range, because otherwise we would have + // stopped on *it*. + if (Start < I->Start) { + I->Start = Start; + I->StartPtr = Ptr; + I->Alignment = Alignment; + } + + // Now we know that Start <= I->End and Start >= I->Start (so the startpoint + // is in or right at the end of I), and that End >= I->Start. Extend I out to + // End. + if (End > I->End) { + I->End = End; + range_iterator NextI = I; + while (++NextI != E && End >= NextI->Start) { + // Merge the range in. + I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); + if (NextI->End > I->End) + I->End = NextI->End; + Ranges.erase(NextI); + NextI = I; + } + } +} + +//===----------------------------------------------------------------------===// +// MemCpyOpt Pass +//===----------------------------------------------------------------------===// + +namespace { + class MemCpyOpt : public FunctionPass { + MemoryDependenceAnalysis *MD; + TargetLibraryInfo *TLI; + const TargetData *TD; + public: + static char ID; // Pass identification, replacement for typeid + MemCpyOpt() : FunctionPass(ID) { + initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); + MD = 0; + TLI = 0; + TD = 0; + } + + bool runOnFunction(Function &F); + + private: + // This transformation requires dominator postdominator info + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<DominatorTree>(); + AU.addRequired<MemoryDependenceAnalysis>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<MemoryDependenceAnalysis>(); + } + + // Helper fuctions + bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); + bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); + bool processMemCpy(MemCpyInst *M); + bool processMemMove(MemMoveInst *M); + bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, + uint64_t cpyLen, CallInst *C); + bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, + uint64_t MSize); + bool processByValArgument(CallSite CS, unsigned ArgNo); + Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, + Value *ByteVal); + + bool iterateOnFunction(Function &F); + }; + + char MemCpyOpt::ID = 0; +} + +// createMemCpyOptPass - The public interface to this file... +FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } + +INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", + false, false) + +/// tryMergingIntoMemset - When scanning forward over instructions, we look for +/// some other patterns to fold away. In particular, this looks for stores to +/// neighboring locations of memory. If it sees enough consecutive ones, it +/// attempts to merge them together into a memcpy/memset. +Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, + Value *StartPtr, Value *ByteVal) { + if (TD == 0) return 0; + + // Okay, so we now have a single store that can be splatable. Scan to find + // all subsequent stores of the same value to offset from the same pointer. + // Join these together into ranges, so we can decide whether contiguous blocks + // are stored. + MemsetRanges Ranges(*TD); + + BasicBlock::iterator BI = StartInst; + for (++BI; !isa<TerminatorInst>(BI); ++BI) { + if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { + // If the instruction is readnone, ignore it, otherwise bail out. We + // don't even allow readonly here because we don't want something like: + // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). + if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) + break; + continue; + } + + if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { + // If this is a store, see if we can merge it in. + if (!NextStore->isSimple()) break; + + // Check to see if this stored value is of the same byte-splattable value. + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), + Offset, *TD)) + break; + + Ranges.addStore(Offset, NextStore); + } else { + MemSetInst *MSI = cast<MemSetInst>(BI); + + if (MSI->isVolatile() || ByteVal != MSI->getValue() || + !isa<ConstantInt>(MSI->getLength())) + break; + + // Check to see if this store is to a constant offset from the start ptr. + int64_t Offset; + if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) + break; + + Ranges.addMemSet(Offset, MSI); + } + } + + // If we have no ranges, then we just had a single store with nothing that + // could be merged in. This is a very common case of course. + if (Ranges.empty()) + return 0; + + // If we had at least one store that could be merged in, add the starting + // store as well. We try to avoid this unless there is at least something + // interesting as a small compile-time optimization. + Ranges.addInst(0, StartInst); + + // If we create any memsets, we put it right before the first instruction that + // isn't part of the memset block. This ensure that the memset is dominated + // by any addressing instruction needed by the start of the block. + IRBuilder<> Builder(BI); + + // Now that we have full information about ranges, loop over the ranges and + // emit memset's for anything big enough to be worthwhile. + Instruction *AMemSet = 0; + for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); + I != E; ++I) { + const MemsetRange &Range = *I; + + if (Range.TheStores.size() == 1) continue; + + // If it is profitable to lower this range to memset, do so now. + if (!Range.isProfitableToUseMemset(*TD)) + continue; + + // Otherwise, we do want to transform this! Create a new memset. + // Get the starting pointer of the block. + StartPtr = Range.StartPtr; + + // Determine alignment + unsigned Alignment = Range.Alignment; + if (Alignment == 0) { + Type *EltType = + cast<PointerType>(StartPtr->getType())->getElementType(); + Alignment = TD->getABITypeAlignment(EltType); + } + + AMemSet = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); + + DEBUG(dbgs() << "Replace stores:\n"; + for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) + dbgs() << *Range.TheStores[i] << '\n'; + dbgs() << "With: " << *AMemSet << '\n'); + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + + // Zap all the stores. + for (SmallVector<Instruction*, 16>::const_iterator + SI = Range.TheStores.begin(), + SE = Range.TheStores.end(); SI != SE; ++SI) { + MD->removeInstruction(*SI); + (*SI)->eraseFromParent(); + } + ++NumMemSetInfer; + } + + return AMemSet; +} + + +bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { + if (!SI->isSimple()) return false; + + if (TD == 0) return false; + + // Detect cases where we're performing call slot forwarding, but + // happen to be using a load-store pair to implement it, rather than + // a memcpy. + if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { + if (LI->isSimple() && LI->hasOneUse() && + LI->getParent() == SI->getParent()) { + MemDepResult ldep = MD->getDependency(LI); + CallInst *C = 0; + if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) + C = dyn_cast<CallInst>(ldep.getInst()); + + if (C) { + // Check that nothing touches the dest of the "copy" between + // the call and the store. + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + AliasAnalysis::Location StoreLoc = AA.getLocation(SI); + for (BasicBlock::iterator I = --BasicBlock::iterator(SI), + E = C; I != E; --I) { + if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { + C = 0; + break; + } + } + } + + if (C) { + bool changed = performCallSlotOptzn(LI, + SI->getPointerOperand()->stripPointerCasts(), + LI->getPointerOperand()->stripPointerCasts(), + TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); + if (changed) { + MD->removeInstruction(SI); + SI->eraseFromParent(); + MD->removeInstruction(LI); + LI->eraseFromParent(); + ++NumMemCpyInstr; + return true; + } + } + } + } + + // There are two cases that are interesting for this code to handle: memcpy + // and memset. Right now we only handle memset. + + // Ensure that the value being stored is something that can be memset'able a + // byte at a time like "0" or "-1" or any width, as well as things like + // 0xA0A0A0A0 and 0.0. + if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) + if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), + ByteVal)) { + BBI = I; // Don't invalidate iterator. + return true; + } + + return false; +} + +bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { + // See if there is another memset or store neighboring this memset which + // allows us to widen out the memset to do a single larger store. + if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) + if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), + MSI->getValue())) { + BBI = I; // Don't invalidate iterator. + return true; + } + return false; +} + + +/// performCallSlotOptzn - takes a memcpy and a call that it depends on, +/// and checks for the possibility of a call slot optimization by having +/// the call write its result directly into the destination of the memcpy. +bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, + Value *cpyDest, Value *cpySrc, + uint64_t cpyLen, CallInst *C) { + // The general transformation to keep in mind is + // + // call @func(..., src, ...) + // memcpy(dest, src, ...) + // + // -> + // + // memcpy(dest, src, ...) + // call @func(..., dest, ...) + // + // Since moving the memcpy is technically awkward, we additionally check that + // src only holds uninitialized values at the moment of the call, meaning that + // the memcpy can be discarded rather than moved. + + // Deliberately get the source and destination with bitcasts stripped away, + // because we'll need to do type comparisons based on the underlying type. + CallSite CS(C); + + // Require that src be an alloca. This simplifies the reasoning considerably. + AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); + if (!srcAlloca) + return false; + + // Check that all of src is copied to dest. + if (TD == 0) return false; + + ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); + if (!srcArraySize) + return false; + + uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * + srcArraySize->getZExtValue(); + + if (cpyLen < srcSize) + return false; + + // Check that accessing the first srcSize bytes of dest will not cause a + // trap. Otherwise the transform is invalid since it might cause a trap + // to occur earlier than it otherwise would. + if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { + // The destination is an alloca. Check it is larger than srcSize. + ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); + if (!destArraySize) + return false; + + uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * + destArraySize->getZExtValue(); + + if (destSize < srcSize) + return false; + } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { + // If the destination is an sret parameter then only accesses that are + // outside of the returned struct type can trap. + if (!A->hasStructRetAttr()) + return false; + + Type *StructTy = cast<PointerType>(A->getType())->getElementType(); + uint64_t destSize = TD->getTypeAllocSize(StructTy); + + if (destSize < srcSize) + return false; + } else { + return false; + } + + // Check that src is not accessed except via the call and the memcpy. This + // guarantees that it holds only undefined values when passed in (so the final + // memcpy can be dropped), that it is not read or written between the call and + // the memcpy, and that writing beyond the end of it is undefined. + SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), + srcAlloca->use_end()); + while (!srcUseList.empty()) { + User *UI = srcUseList.pop_back_val(); + + if (isa<BitCastInst>(UI)) { + for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); + I != E; ++I) + srcUseList.push_back(*I); + } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { + if (G->hasAllZeroIndices()) + for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); + I != E; ++I) + srcUseList.push_back(*I); + else + return false; + } else if (UI != C && UI != cpy) { + return false; + } + } + + // Since we're changing the parameter to the callsite, we need to make sure + // that what would be the new parameter dominates the callsite. + DominatorTree &DT = getAnalysis<DominatorTree>(); + if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) + if (!DT.dominates(cpyDestInst, C)) + return false; + + // In addition to knowing that the call does not access src in some + // unexpected manner, for example via a global, which we deduce from + // the use analysis, we also need to know that it does not sneakily + // access dest. We rely on AA to figure this out for us. + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef) + return false; + + // All the checks have passed, so do the transformation. + bool changedArgument = false; + for (unsigned i = 0; i < CS.arg_size(); ++i) + if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { + if (cpySrc->getType() != cpyDest->getType()) + cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), + cpyDest->getName(), C); + changedArgument = true; + if (CS.getArgument(i)->getType() == cpyDest->getType()) + CS.setArgument(i, cpyDest); + else + CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, + CS.getArgument(i)->getType(), cpyDest->getName(), C)); + } + + if (!changedArgument) + return false; + + // Drop any cached information about the call, because we may have changed + // its dependence information by changing its parameter. + MD->removeInstruction(C); + + // Remove the memcpy. + MD->removeInstruction(cpy); + ++NumMemCpyInstr; + + return true; +} + +/// processMemCpyMemCpyDependence - We've found that the (upward scanning) +/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to +/// copy from MDep's input if we can. MSize is the size of M's copy. +/// +bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, + uint64_t MSize) { + // We can only transforms memcpy's where the dest of one is the source of the + // other. + if (M->getSource() != MDep->getDest() || MDep->isVolatile()) + return false; + + // If dep instruction is reading from our current input, then it is a noop + // transfer and substituting the input won't change this instruction. Just + // ignore the input and let someone else zap MDep. This handles cases like: + // memcpy(a <- a) + // memcpy(b <- a) + if (M->getSource() == MDep->getSource()) + return false; + + // Second, the length of the memcpy's must be the same, or the preceding one + // must be larger than the following one. + ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); + ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); + if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) + return false; + + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + // Verify that the copied-from memory doesn't change in between the two + // transfers. For example, in: + // memcpy(a <- b) + // *b = 42; + // memcpy(c <- a) + // It would be invalid to transform the second memcpy into memcpy(c <- b). + // + // TODO: If the code between M and MDep is transparent to the destination "c", + // then we could still perform the xform by moving M up to the first memcpy. + // + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = + MD->getPointerDependencyFrom(AA.getLocationForSource(MDep), + false, M, M->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + + // If the dest of the second might alias the source of the first, then the + // source and dest might overlap. We still want to eliminate the intermediate + // value, but we have to generate a memmove instead of memcpy. + bool UseMemMove = false; + if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) + UseMemMove = true; + + // If all checks passed, then we can transform M. + + // Make sure to use the lesser of the alignment of the source and the dest + // since we're changing where we're reading from, but don't want to increase + // the alignment past what can be read from or written to. + // TODO: Is this worth it if we're creating a less aligned memcpy? For + // example we could be moving from movaps -> movq on x86. + unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); + + IRBuilder<> Builder(M); + if (UseMemMove) + Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), + Align, M->isVolatile()); + else + Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), + Align, M->isVolatile()); + + // Remove the instruction we're replacing. + MD->removeInstruction(M); + M->eraseFromParent(); + ++NumMemCpyInstr; + return true; +} + + +/// processMemCpy - perform simplification of memcpy's. If we have memcpy A +/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite +/// B to be a memcpy from X to Z (or potentially a memmove, depending on +/// circumstances). This allows later passes to remove the first memcpy +/// altogether. +bool MemCpyOpt::processMemCpy(MemCpyInst *M) { + // We can only optimize statically-sized memcpy's that are non-volatile. + ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); + if (CopySize == 0 || M->isVolatile()) return false; + + // If the source and destination of the memcpy are the same, then zap it. + if (M->getSource() == M->getDest()) { + MD->removeInstruction(M); + M->eraseFromParent(); + return false; + } + + // If copying from a constant, try to turn the memcpy into a memset. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { + IRBuilder<> Builder(M); + Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize, + M->getAlignment(), false); + MD->removeInstruction(M); + M->eraseFromParent(); + ++NumCpyToSet; + return true; + } + + // The are two possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + MemDepResult DepInfo = MD->getDependency(M); + if (!DepInfo.isClobber()) + return false; + + if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst())) + return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); + + if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { + if (performCallSlotOptzn(M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), C)) { + MD->removeInstruction(M); + M->eraseFromParent(); + return true; + } + } + + return false; +} + +/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst +/// are guaranteed not to alias. +bool MemCpyOpt::processMemMove(MemMoveInst *M) { + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + if (!TLI->has(LibFunc::memmove)) + return false; + + // See if the pointers alias. + if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) + return false; + + DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); + + // If not, then we know we can transform this. + Module *Mod = M->getParent()->getParent()->getParent(); + Type *ArgTys[3] = { M->getRawDest()->getType(), + M->getRawSource()->getType(), + M->getLength()->getType() }; + M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, + ArgTys)); + + // MemDep may have over conservative information about this instruction, just + // conservatively flush it from the cache. + MD->removeInstruction(M); + + ++NumMoveToCpy; + return true; +} + +/// processByValArgument - This is called on every byval argument in call sites. +bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { + if (TD == 0) return false; + + // Find out what feeds this byval argument. + Value *ByValArg = CS.getArgument(ArgNo); + Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); + uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); + MemDepResult DepInfo = + MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), + true, CS.getInstruction(), + CS.getInstruction()->getParent()); + if (!DepInfo.isClobber()) + return false; + + // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by + // a memcpy, see if we can byval from the source of the memcpy instead of the + // result. + MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); + if (MDep == 0 || MDep->isVolatile() || + ByValArg->stripPointerCasts() != MDep->getDest()) + return false; + + // The length of the memcpy must be larger or equal to the size of the byval. + ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); + if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) + return false; + + // Get the alignment of the byval. If the call doesn't specify the alignment, + // then it is some target specific value that we can't know. + unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); + if (ByValAlign == 0) return false; + + // If it is greater than the memcpy, then we check to see if we can force the + // source of the memcpy to the alignment we need. If we fail, we bail out. + if (MDep->getAlignment() < ByValAlign && + getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) + return false; + + // Verify that the copied-from memory doesn't change in between the memcpy and + // the byval call. + // memcpy(a <- b) + // *b = 42; + // foo(*a) + // It would be invalid to transform the second memcpy into foo(*b). + // + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = + MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), + false, CS.getInstruction(), MDep->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + + Value *TmpCast = MDep->getSource(); + if (MDep->getSource()->getType() != ByValArg->getType()) + TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), + "tmpcast", CS.getInstruction()); + + DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" + << " " << *MDep << "\n" + << " " << *CS.getInstruction() << "\n"); + + // Otherwise we're good! Update the byval argument. + CS.setArgument(ArgNo, TmpCast); + ++NumMemCpyInstr; + return true; +} + +/// iterateOnFunction - Executes one iteration of MemCpyOpt. +bool MemCpyOpt::iterateOnFunction(Function &F) { + bool MadeChange = false; + + // Walk all instruction in the function. + for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + // Avoid invalidating the iterator. + Instruction *I = BI++; + + bool RepeatInstruction = false; + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + MadeChange |= processStore(SI, BI); + else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) + RepeatInstruction = processMemSet(M, BI); + else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) + RepeatInstruction = processMemCpy(M); + else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) + RepeatInstruction = processMemMove(M); + else if (CallSite CS = (Value*)I) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.paramHasAttr(i+1, Attribute::ByVal)) + MadeChange |= processByValArgument(CS, i); + } + + // Reprocess the instruction if desired. + if (RepeatInstruction) { + if (BI != BB->begin()) --BI; + MadeChange = true; + } + } + } + + return MadeChange; +} + +// MemCpyOpt::runOnFunction - This is the main transformation entry point for a +// function. +// +bool MemCpyOpt::runOnFunction(Function &F) { + bool MadeChange = false; + MD = &getAnalysis<MemoryDependenceAnalysis>(); + TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + + // If we don't have at least memset and memcpy, there is little point of doing + // anything here. These are required by a freestanding implementation, so if + // even they are disabled, there is no point in trying hard. + if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) + return false; + + while (1) { + if (!iterateOnFunction(F)) + break; + MadeChange = true; + } + + MD = 0; + return MadeChange; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/ObjCARC.cpp b/src/LLVM/lib/Transforms/Scalar/ObjCARC.cpp new file mode 100644 index 0000000..da74e9c --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/ObjCARC.cpp
@@ -0,0 +1,3717 @@ +//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ObjC ARC optimizations. ARC stands for +// Automatic Reference Counting and is a system for managing reference counts +// for objects in Objective C. +// +// The optimizations performed include elimination of redundant, partially +// redundant, and inconsequential reference count operations, elimination of +// redundant weak pointer operations, pattern-matching and replacement of +// low-level operations into higher-level operations, and numerous minor +// simplifications. +// +// This file also defines a simple ARC-aware AliasAnalysis. +// +// WARNING: This file knows about certain library functions. It recognizes them +// by name, and hardwires knowedge of their semantics. +// +// WARNING: This file knows about how certain Objective-C library functions are +// used. Naive LLVM IR transformations which would otherwise be +// behavior-preserving may break these assumptions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +// A handy option to enable/disable all optimizations in this file. +static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true)); + +//===----------------------------------------------------------------------===// +// Misc. Utilities +//===----------------------------------------------------------------------===// + +namespace { + /// MapVector - An associative container with fast insertion-order + /// (deterministic) iteration over its elements. Plus the special + /// blot operation. + template<class KeyT, class ValueT> + class MapVector { + /// Map - Map keys to indices in Vector. + typedef DenseMap<KeyT, size_t> MapTy; + MapTy Map; + + /// Vector - Keys and values. + typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; + VectorTy Vector; + + public: + typedef typename VectorTy::iterator iterator; + typedef typename VectorTy::const_iterator const_iterator; + iterator begin() { return Vector.begin(); } + iterator end() { return Vector.end(); } + const_iterator begin() const { return Vector.begin(); } + const_iterator end() const { return Vector.end(); } + +#ifdef XDEBUG + ~MapVector() { + assert(Vector.size() >= Map.size()); // May differ due to blotting. + for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); + I != E; ++I) { + assert(I->second < Vector.size()); + assert(Vector[I->second].first == I->first); + } + for (typename VectorTy::const_iterator I = Vector.begin(), + E = Vector.end(); I != E; ++I) + assert(!I->first || + (Map.count(I->first) && + Map[I->first] == size_t(I - Vector.begin()))); + } +#endif + + ValueT &operator[](KeyT Arg) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Arg, size_t(0))); + if (Pair.second) { + Pair.first->second = Vector.size(); + Vector.push_back(std::make_pair(Arg, ValueT())); + return Vector.back().second; + } + return Vector[Pair.first->second].second; + } + + std::pair<iterator, bool> + insert(const std::pair<KeyT, ValueT> &InsertPair) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(InsertPair.first, size_t(0))); + if (Pair.second) { + Pair.first->second = Vector.size(); + Vector.push_back(InsertPair); + return std::make_pair(llvm::prior(Vector.end()), true); + } + return std::make_pair(Vector.begin() + Pair.first->second, false); + } + + const_iterator find(KeyT Key) const { + typename MapTy::const_iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + + /// blot - This is similar to erase, but instead of removing the element + /// from the vector, it just zeros out the key in the vector. This leaves + /// iterators intact, but clients must be prepared for zeroed-out keys when + /// iterating. + void blot(KeyT Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return; + Vector[It->second].first = KeyT(); + Map.erase(It); + } + + void clear() { + Map.clear(); + Vector.clear(); + } + }; +} + +//===----------------------------------------------------------------------===// +// ARC Utilities. +//===----------------------------------------------------------------------===// + +namespace { + /// InstructionClass - A simple classification for instructions. + enum InstructionClass { + IC_Retain, ///< objc_retain + IC_RetainRV, ///< objc_retainAutoreleasedReturnValue + IC_RetainBlock, ///< objc_retainBlock + IC_Release, ///< objc_release + IC_Autorelease, ///< objc_autorelease + IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue + IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush + IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop + IC_NoopCast, ///< objc_retainedObject, etc. + IC_FusedRetainAutorelease, ///< objc_retainAutorelease + IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue + IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) + IC_StoreWeak, ///< objc_storeWeak (primitive) + IC_InitWeak, ///< objc_initWeak (derived) + IC_LoadWeak, ///< objc_loadWeak (derived) + IC_MoveWeak, ///< objc_moveWeak (derived) + IC_CopyWeak, ///< objc_copyWeak (derived) + IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_CallOrUser, ///< could call objc_release and/or "use" pointers + IC_Call, ///< could call objc_release + IC_User, ///< could "use" a pointer + IC_None ///< anything else + }; +} + +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer. +static bool IsPotentialUse(const Value *Op) { + // Pointers to static or stack storage are not reference-counted pointers. + if (isa<Constant>(Op) || isa<AllocaInst>(Op)) + return false; + // Special arguments are not reference-counted. + if (const Argument *Arg = dyn_cast<Argument>(Op)) + if (Arg->hasByValAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types, and not function pointers. + PointerType *Ty = dyn_cast<PointerType>(Op->getType()); + if (!Ty || isa<FunctionType>(Ty->getElementType())) + return false; + // Conservatively assume anything else is a potential use. + return true; +} + +/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind +/// of construct CS is. +static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialUse(*I)) + return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; + + return CS.onlyReadsMemory() ? IC_None : IC_Call; +} + +/// GetFunctionClass - Determine if F is one of the special known Functions. +/// If it isn't, return IC_CallOrUser. +static InstructionClass GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No arguments. + if (AI == AE) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Default(IC_CallOrUser); + + // One argument. + const Argument *A0 = AI++; + if (AI == AE) + // Argument is a pointer. + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { + Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_retain", IC_Retain) + .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) + .Case("objc_retainBlock", IC_RetainBlock) + .Case("objc_release", IC_Release) + .Case("objc_autorelease", IC_Autorelease) + .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) + .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) + .Case("objc_retainedObject", IC_NoopCast) + .Case("objc_unretainedObject", IC_NoopCast) + .Case("objc_unretainedPointer", IC_NoopCast) + .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) + .Default(IC_CallOrUser); + + // Argument is i8** + if (PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_loadWeakRetained", IC_LoadWeakRetained) + .Case("objc_loadWeak", IC_LoadWeak) + .Case("objc_destroyWeak", IC_DestroyWeak) + .Default(IC_CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = AI++; + if (AI == AE) + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) + if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_storeWeak", IC_StoreWeak) + .Case("objc_initWeak", IC_InitWeak) + .Default(IC_CallOrUser); + // Second argument is i8**. + if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_moveWeak", IC_MoveWeak) + .Case("objc_copyWeak", IC_CopyWeak) + .Default(IC_CallOrUser); + } + + // Anything else. + return IC_CallOrUser; +} + +/// GetInstructionClass - Determine what kind of construct V is. +static InstructionClass GetInstructionClass(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Any instruction other than bitcast and gep with a pointer operand have a + // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer + // to a subsequent use, rather than using it themselves, in this sense. + // As a short cut, several other opcodes are known to have no pointer + // operands of interest. And ret is never followed by a release, so it's + // not interesting to examine. + switch (I->getOpcode()) { + case Instruction::Call: { + const CallInst *CI = cast<CallInst>(I); + // Check for calls to special functions. + if (const Function *F = CI->getCalledFunction()) { + InstructionClass Class = GetFunctionClass(F); + if (Class != IC_CallOrUser) + return Class; + + // None of the intrinsic functions do objc_release. For intrinsics, the + // only question is whether or not they may be users. + switch (F->getIntrinsicID()) { + case 0: break; + case Intrinsic::bswap: case Intrinsic::ctpop: + case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::returnaddress: case Intrinsic::frameaddress: + case Intrinsic::stacksave: case Intrinsic::stackrestore: + case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return IC_None; + default: + for (Function::const_arg_iterator AI = F->arg_begin(), + AE = F->arg_end(); AI != AE; ++AI) + if (IsPotentialUse(AI)) + return IC_User; + return IC_None; + } + } + return GetCallSiteClass(CI); + } + case Instruction::Invoke: + return GetCallSiteClass(cast<InvokeInst>(I)); + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: case Instruction::PHI: + case Instruction::Ret: case Instruction::Br: + case Instruction::Switch: case Instruction::IndirectBr: + case Instruction::Alloca: case Instruction::VAArg: + case Instruction::Add: case Instruction::FAdd: + case Instruction::Sub: case Instruction::FSub: + case Instruction::Mul: case Instruction::FMul: + case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: + case Instruction::SRem: case Instruction::URem: case Instruction::FRem: + case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::And: case Instruction::Or: case Instruction::Xor: + case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: + case Instruction::IntToPtr: case Instruction::FCmp: + case Instruction::FPTrunc: case Instruction::FPExt: + case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::UIToFP: case Instruction::SIToFP: + case Instruction::InsertElement: case Instruction::ExtractElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + break; + case Instruction::ICmp: + // Comparing a pointer with null, or any other constant, isn't an + // interesting use, because we don't care what the pointer points to, or + // about the values of any other dynamic reference-counted pointers. + if (IsPotentialUse(I->getOperand(1))) + return IC_User; + break; + default: + // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. + for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (IsPotentialUse(*OI)) + return IC_User; + } + } + + // Otherwise, it's totally inert for ARC purposes. + return IC_None; +} + +/// GetBasicInstructionClass - Determine what kind of construct V is. This is +/// similar to GetInstructionClass except that it only detects objc runtine +/// calls. This allows it to be faster. +static InstructionClass GetBasicInstructionClass(const Value *V) { + if (const CallInst *CI = dyn_cast<CallInst>(V)) { + if (const Function *F = CI->getCalledFunction()) + return GetFunctionClass(F); + // Otherwise, be conservative. + return IC_CallOrUser; + } + + // Otherwise, be conservative. + return IC_User; +} + +/// IsRetain - Test if the the given class is objc_retain or +/// equivalent. +static bool IsRetain(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV; +} + +/// IsAutorelease - Test if the the given class is objc_autorelease or +/// equivalent. +static bool IsAutorelease(InstructionClass Class) { + return Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// IsForwarding - Test if the given class represents instructions which return +/// their argument verbatim. +static bool IsForwarding(InstructionClass Class) { + // objc_retainBlock technically doesn't always return its argument + // verbatim, but it doesn't matter for our purposes here. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock || + Class == IC_NoopCast; +} + +/// IsNoopOnNull - Test if the given class represents instructions which do +/// nothing if passed a null pointer. +static bool IsNoopOnNull(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock; +} + +/// IsAlwaysTail - Test if the given class represents instructions which are +/// always safe to mark with the "tail" keyword. +static bool IsAlwaysTail(InstructionClass Class) { + // IC_RetainBlock may be given a stack argument. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// IsNoThrow - Test if the given class represents instructions which are always +/// safe to mark with the nounwind attribute.. +static bool IsNoThrow(InstructionClass Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_AutoreleasepoolPush || + Class == IC_AutoreleasepoolPop; +} + +/// EraseInstruction - Erase the given instruction. ObjC calls return their +/// argument verbatim, so if it's such a call and the return value has users, +/// replace them with the argument value. +static void EraseInstruction(Instruction *CI) { + Value *OldArg = cast<CallInst>(CI)->getArgOperand(0); + + bool Unused = CI->use_empty(); + + if (!Unused) { + // Replace the return value with the argument. + assert(IsForwarding(GetBasicInstructionClass(CI)) && + "Can't delete non-forwarding instruction with users!"); + CI->replaceAllUsesWith(OldArg); + } + + CI->eraseFromParent(); + + if (Unused) + RecursivelyDeleteTriviallyDeadInstructions(OldArg); +} + +/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which +/// also knows how to look through objc_retain and objc_autorelease calls, which +/// we know to return their argument verbatim. +static const Value *GetUnderlyingObjCPtr(const Value *V) { + for (;;) { + V = GetUnderlyingObject(V); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + + return V; +} + +/// StripPointerCastsAndObjCCalls - This is a wrapper around +/// Value::stripPointerCasts which also knows how to look through objc_retain +/// and objc_autorelease calls, which we know to return their argument verbatim. +static const Value *StripPointerCastsAndObjCCalls(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// StripPointerCastsAndObjCCalls - This is a wrapper around +/// Value::stripPointerCasts which also knows how to look through objc_retain +/// and objc_autorelease calls, which we know to return their argument verbatim. +static Value *StripPointerCastsAndObjCCalls(Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// GetObjCArg - Assuming the given instruction is one of the special calls such +/// as objc_retain or objc_release, return the argument value, stripped of no-op +/// casts and forwarding calls. +static Value *GetObjCArg(Value *Inst) { + return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); +} + +/// IsObjCIdentifiedObject - This is similar to AliasAnalysis' +/// isObjCIdentifiedObject, except that it uses special knowledge of +/// ObjC conventions... +static bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa<CallInst>(V) || isa<InvokeInst>(V) || + isa<Argument>(V) || isa<Constant>(V) || + isa<AllocaInst>(V)) + return true; + + if (const LoadInst *LI = dyn_cast<LoadInst>(V)) { + const Value *Pointer = + StripPointerCastsAndObjCCalls(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || + Name.startswith("\01L_OBJC_METH_VAR_NAME_") || + Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + } + } + + return false; +} + +/// FindSingleUseIdentifiedObject - This is similar to +/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value +/// with multiple uses. +static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { + if (Arg->hasOneUse()) { + if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) + return FindSingleUseIdentifiedObject(BC->getOperand(0)); + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) + if (GEP->hasAllZeroIndices()) + return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); + if (IsForwarding(GetBasicInstructionClass(Arg))) + return FindSingleUseIdentifiedObject( + cast<CallInst>(Arg)->getArgOperand(0)); + if (!IsObjCIdentifiedObject(Arg)) + return 0; + return Arg; + } + + // If we found an identifiable object but it has multiple uses, but they + // are trivial uses, we can still consider this to be a single-use + // value. + if (IsObjCIdentifiedObject(Arg)) { + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) + return 0; + } + + return Arg; + } + + return 0; +} + +/// ModuleHasARC - Test if the given module looks interesting to run ARC +/// optimization on. +static bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("objc_retain") || + M.getNamedValue("objc_release") || + M.getNamedValue("objc_autorelease") || + M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_retainBlock") || + M.getNamedValue("objc_autoreleaseReturnValue") || + M.getNamedValue("objc_autoreleasePoolPush") || + M.getNamedValue("objc_loadWeakRetained") || + M.getNamedValue("objc_loadWeak") || + M.getNamedValue("objc_destroyWeak") || + M.getNamedValue("objc_storeWeak") || + M.getNamedValue("objc_initWeak") || + M.getNamedValue("objc_moveWeak") || + M.getNamedValue("objc_copyWeak") || + M.getNamedValue("objc_retainedObject") || + M.getNamedValue("objc_unretainedObject") || + M.getNamedValue("objc_unretainedPointer"); +} + +//===----------------------------------------------------------------------===// +// ARC AliasAnalysis. +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" + +namespace { + /// ObjCARCAliasAnalysis - This is a simple alias analysis + /// implementation that uses knowledge of ARC constructs to answer queries. + /// + /// TODO: This class could be generalized to know about other ObjC-specific + /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing + /// even though their offsets are dynamic. + class ObjCARCAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + ObjCARCAliasAnalysis() : ImmutablePass(ID) { + initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + private: + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; +} // End of anonymous namespace + +// Register this pass... +char ObjCARCAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { + return new ObjCARCAliasAnalysis(); +} + +void +ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { + if (!EnableARCOpts) + return AliasAnalysis::alias(LocA, LocB); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making a + // precise alias query. + const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); + const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); + AliasResult Result = + AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), + Location(SB, LocB.Size, LocB.TBAATag)); + if (Result != MayAlias) + return Result; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *UA = GetUnderlyingObjCPtr(SA); + const Value *UB = GetUnderlyingObjCPtr(SB); + if (UA != SA || UB != SB) { + Result = AliasAnalysis::alias(Location(UA), Location(UB)); + // We can't use MustAlias or PartialAlias results here because + // GetUnderlyingObjCPtr may return an offsetted pointer value. + if (Result == NoAlias) + return NoAlias; + } + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return MayAlias; +} + +bool +ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableARCOpts) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); + if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), + OrLocal)) + return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S); + if (U != S) + return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return false; +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // We have nothing to do. Just chain to the next AliasAnalysis. + return AliasAnalysis::getModRefBehavior(CS); +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefBehavior(F); + + switch (GetFunctionClass(F)) { + case IC_NoopCast: + return DoesNotAccessMemory; + default: + break; + } + + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefInfo(CS, Loc); + + switch (GetBasicInstructionClass(CS.getInstruction())) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_NoopCast: + case IC_AutoreleasepoolPush: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, becuase it updates + // pointers when it copies block data. + return NoModRef; + default: + break; + } + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // TODO: Theoretically we could check for dependencies between objc_* calls + // and OnlyAccessesArgumentPointees calls or other well-behaved calls. + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + +//===----------------------------------------------------------------------===// +// ARC expansion. +//===----------------------------------------------------------------------===// + +#include "llvm/Support/InstIterator.h" +#include "llvm/Transforms/Scalar.h" + +namespace { + /// ObjCARCExpand - Early ARC transformations. + class ObjCARCExpand : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + public: + static char ID; + ObjCARCExpand() : FunctionPass(ID) { + initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCExpand::ID = 0; +INITIALIZE_PASS(ObjCARCExpand, + "objc-arc-expand", "ObjC ARC expansion", false, false) + +Pass *llvm::createObjCARCExpandPass() { + return new ObjCARCExpand(); +} + +void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +bool ObjCARCExpand::doInitialization(Module &M) { + Run = ModuleHasARC(M); + return false; +} + +bool ObjCARCExpand::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + bool Changed = false; + + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + + switch (GetBasicInstructionClass(Inst)) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These calls return their argument verbatim, as a low-level + // optimization. However, this makes high-level optimizations + // harder. Undo any uses of this optimization that the front-end + // emitted here. We'll redo them in a later pass. + Changed = true; + Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); + break; + default: + break; + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// ARC optimization. +//===----------------------------------------------------------------------===// + +// TODO: On code like this: +// +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// stuff_that_cannot_release() +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// +// The second retain and autorelease can be deleted. + +// TODO: It should be possible to delete +// objc_autoreleasePoolPush and objc_autoreleasePoolPop +// pairs if nothing is actually autoreleased between them. Also, autorelease +// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code +// after inlining) can be turned into plain release calls. + +// TODO: Critical-edge splitting. If the optimial insertion point is +// a critical edge, the current algorithm has to fail, because it doesn't +// know how to split edges. It should be possible to make the optimizer +// think in terms of edges, rather than blocks, and then split critical +// edges on demand. + +// TODO: OptimizeSequences could generalized to be Interprocedural. + +// TODO: Recognize that a bunch of other objc runtime calls have +// non-escaping arguments and non-releasing arguments, and may be +// non-autoreleasing. + +// TODO: Sink autorelease calls as far as possible. Unfortunately we +// usually can't sink them past other calls, which would be the main +// case where it would be useful. + +// TODO: The pointer returned from objc_loadWeakRetained is retained. + +// TODO: Delete release+retain pairs (rare). + +#include "llvm/GlobalAlias.h" +#include "llvm/Constants.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" + +STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); +STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); +STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); +STATISTIC(NumRets, "Number of return value forwarding " + "retain+autoreleaes eliminated"); +STATISTIC(NumRRs, "Number of retain+release paths eliminated"); +STATISTIC(NumPeeps, "Number of calls peephole-optimized"); + +namespace { + /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it + /// uses many of the same techniques, except it uses special ObjC-specific + /// reasoning about pointer relationships. + class ProvenanceAnalysis { + AliasAnalysis *AA; + + typedef std::pair<const Value *, const Value *> ValuePairTy; + typedef DenseMap<ValuePairTy, bool> CachedResultsTy; + CachedResultsTy CachedResults; + + bool relatedCheck(const Value *A, const Value *B); + bool relatedSelect(const SelectInst *A, const Value *B); + bool relatedPHI(const PHINode *A, const Value *B); + + // Do not implement. + void operator=(const ProvenanceAnalysis &); + ProvenanceAnalysis(const ProvenanceAnalysis &); + + public: + ProvenanceAnalysis() {} + + void setAA(AliasAnalysis *aa) { AA = aa; } + + AliasAnalysis *getAA() const { return AA; } + + bool related(const Value *A, const Value *B); + + void clear() { + CachedResults.clear(); + } + }; +} + +bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for relations between the values on corresponding arms. + if (const SelectInst *SB = dyn_cast<SelectInst>(B)) + if (A->getCondition() == SB->getCondition()) { + if (related(A->getTrueValue(), SB->getTrueValue())) + return true; + if (related(A->getFalseValue(), SB->getFalseValue())) + return true; + return false; + } + + // Check both arms of the Select node individually. + if (related(A->getTrueValue(), B)) + return true; + if (related(A->getFalseValue(), B)) + return true; + + // The arms both checked out. + return false; +} + +bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { + // If the values are PHIs in the same block, we can do a more precise as well + // as efficient check: just check for relations between the values on + // corresponding edges. + if (const PHINode *PNB = dyn_cast<PHINode>(B)) + if (PNB->getParent() == A->getParent()) { + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) + if (related(A->getIncomingValue(i), + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + return true; + return false; + } + + // Check each unique source of the PHI node against B. + SmallPtrSet<const Value *, 4> UniqueSrc; + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { + const Value *PV1 = A->getIncomingValue(i); + if (UniqueSrc.insert(PV1) && related(PV1, B)) + return true; + } + + // All of the arms checked out. + return false; +} + +/// isStoredObjCPointer - Test if the value of P, or any value covered by its +/// provenance, is ever stored within the function (not counting callees). +static bool isStoredObjCPointer(const Value *P) { + SmallPtrSet<const Value *, 8> Visited; + SmallVector<const Value *, 8> Worklist; + Worklist.push_back(P); + Visited.insert(P); + do { + P = Worklist.pop_back_val(); + for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); + UI != UE; ++UI) { + const User *Ur = *UI; + if (isa<StoreInst>(Ur)) { + if (UI.getOperandNo() == 0) + // The pointer is stored. + return true; + // The pointed is stored through. + continue; + } + if (isa<CallInst>(Ur)) + // The pointer is passed as an argument, ignore this. + continue; + if (isa<PtrToIntInst>(P)) + // Assume the worst. + return true; + if (Visited.insert(Ur)) + Worklist.push_back(Ur); + } + } while (!Worklist.empty()); + + // Everything checked out. + return false; +} + +bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { + // Skip past provenance pass-throughs. + A = GetUnderlyingObjCPtr(A); + B = GetUnderlyingObjCPtr(B); + + // Quick check. + if (A == B) + return true; + + // Ask regular AliasAnalysis, for a first approximation. + switch (AA->alias(A, B)) { + case AliasAnalysis::NoAlias: + return false; + case AliasAnalysis::MustAlias: + case AliasAnalysis::PartialAlias: + return true; + case AliasAnalysis::MayAlias: + break; + } + + bool AIsIdentified = IsObjCIdentifiedObject(A); + bool BIsIdentified = IsObjCIdentifiedObject(B); + + // An ObjC-Identified object can't alias a load if it is never locally stored. + if (AIsIdentified) { + if (BIsIdentified) { + // If both pointers have provenance, they can be directly compared. + if (A != B) + return false; + } else { + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); + } + } else { + if (BIsIdentified && isa<LoadInst>(A)) + return isStoredObjCPointer(B); + } + + // Special handling for PHI and Select. + if (const PHINode *PN = dyn_cast<PHINode>(A)) + return relatedPHI(PN, B); + if (const PHINode *PN = dyn_cast<PHINode>(B)) + return relatedPHI(PN, A); + if (const SelectInst *S = dyn_cast<SelectInst>(A)) + return relatedSelect(S, B); + if (const SelectInst *S = dyn_cast<SelectInst>(B)) + return relatedSelect(S, A); + + // Conservative. + return true; +} + +bool ProvenanceAnalysis::related(const Value *A, const Value *B) { + // Begin by inserting a conservative value into the map. If the insertion + // fails, we have the answer already. If it succeeds, leave it there until we + // compute the real answer to guard against recursive queries. + if (A > B) std::swap(A, B); + std::pair<CachedResultsTy::iterator, bool> Pair = + CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); + if (!Pair.second) + return Pair.first->second; + + bool Result = relatedCheck(A, B); + CachedResults[ValuePairTy(A, B)] = Result; + return Result; +} + +namespace { + // Sequence - A sequence of states that a pointer may go through in which an + // objc_retain and objc_release are actually needed. + enum Sequence { + S_None, + S_Retain, ///< objc_retain(x) + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement + S_Use, ///< any use of x + S_Stop, ///< like S_Release, but code motion is stopped + S_Release, ///< objc_release(x) + S_MovableRelease ///< objc_release(x), !clang.imprecise_release + }; +} + +static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { + // The easy cases. + if (A == B) + return A; + if (A == S_None || B == S_None) + return S_None; + + if (A > B) std::swap(A, B); + if (TopDown) { + // Choose the side which is further along in the sequence. + if ((A == S_Retain || A == S_CanRelease) && + (B == S_CanRelease || B == S_Use)) + return B; + } else { + // Choose the side which is further along in the sequence. + if ((A == S_Use || A == S_CanRelease) && + (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) + return A; + // If both sides are releases, choose the more conservative one. + if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) + return A; + if (A == S_Release && B == S_MovableRelease) + return A; + } + + return S_None; +} + +namespace { + /// RRInfo - Unidirectional information about either a + /// retain-decrement-use-release sequence or release-use-decrement-retain + /// reverese sequence. + struct RRInfo { + /// KnownSafe - After an objc_retain, the reference count of the referenced + /// object is known to be positive. Similarly, before an objc_release, the + /// reference count of the referenced object is known to be positive. If + /// there are retain-release pairs in code regions where the retain count + /// is known to be positive, they can be eliminated, regardless of any side + /// effects between them. + /// + /// Also, a retain+release pair nested within another retain+release + /// pair all on the known same pointer value can be eliminated, regardless + /// of any intervening side effects. + /// + /// KnownSafe is true when either of these conditions is satisfied. + bool KnownSafe; + + /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as + /// opposed to objc_retain calls). + bool IsRetainBlock; + + /// IsTailCallRelease - True of the objc_release calls are all marked + /// with the "tail" keyword. + bool IsTailCallRelease; + + /// ReleaseMetadata - If the Calls are objc_release calls and they all have + /// a clang.imprecise_release tag, this is the metadata tag. + MDNode *ReleaseMetadata; + + /// Calls - For a top-down sequence, the set of objc_retains or + /// objc_retainBlocks. For bottom-up, the set of objc_releases. + SmallPtrSet<Instruction *, 2> Calls; + + /// ReverseInsertPts - The set of optimal insert positions for + /// moving calls in the opposite sequence. + SmallPtrSet<Instruction *, 2> ReverseInsertPts; + + RRInfo() : + KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false), + ReleaseMetadata(0) {} + + void clear(); + }; +} + +void RRInfo::clear() { + KnownSafe = false; + IsRetainBlock = false; + IsTailCallRelease = false; + ReleaseMetadata = 0; + Calls.clear(); + ReverseInsertPts.clear(); +} + +namespace { + /// PtrState - This class summarizes several per-pointer runtime properties + /// which are propogated through the flow graph. + class PtrState { + /// RefCount - The known minimum number of reference count increments. + unsigned RefCount; + + /// NestCount - The known minimum level of retain+release nesting. + unsigned NestCount; + + /// Seq - The current position in the sequence. + Sequence Seq; + + public: + /// RRI - Unidirectional information about the current sequence. + /// TODO: Encapsulate this better. + RRInfo RRI; + + PtrState() : RefCount(0), NestCount(0), Seq(S_None) {} + + void SetAtLeastOneRefCount() { + if (RefCount == 0) RefCount = 1; + } + + void IncrementRefCount() { + if (RefCount != UINT_MAX) ++RefCount; + } + + void DecrementRefCount() { + if (RefCount != 0) --RefCount; + } + + bool IsKnownIncremented() const { + return RefCount > 0; + } + + void IncrementNestCount() { + if (NestCount != UINT_MAX) ++NestCount; + } + + void DecrementNestCount() { + if (NestCount != 0) --NestCount; + } + + bool IsKnownNested() const { + return NestCount > 0; + } + + void SetSeq(Sequence NewSeq) { + Seq = NewSeq; + } + + void SetSeqToRelease(MDNode *M) { + if (Seq == S_None || Seq == S_Use) { + Seq = M ? S_MovableRelease : S_Release; + RRI.ReleaseMetadata = M; + } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) { + Seq = S_Release; + RRI.ReleaseMetadata = 0; + } + } + + Sequence GetSeq() const { + return Seq; + } + + void ClearSequenceProgress() { + Seq = S_None; + RRI.clear(); + } + + void Merge(const PtrState &Other, bool TopDown); + }; +} + +void +PtrState::Merge(const PtrState &Other, bool TopDown) { + Seq = MergeSeqs(Seq, Other.Seq, TopDown); + RefCount = std::min(RefCount, Other.RefCount); + NestCount = std::min(NestCount, Other.NestCount); + + // We can't merge a plain objc_retain with an objc_retainBlock. + if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) + Seq = S_None; + + if (Seq == S_None) { + RRI.clear(); + } else { + // Conservatively merge the ReleaseMetadata information. + if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) + RRI.ReleaseMetadata = 0; + + RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; + RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; + RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(), + Other.RRI.ReverseInsertPts.end()); + } +} + +namespace { + /// BBState - Per-BasicBlock state. + class BBState { + /// TopDownPathCount - The number of unique control paths from the entry + /// which can reach this block. + unsigned TopDownPathCount; + + /// BottomUpPathCount - The number of unique control paths to exits + /// from this block. + unsigned BottomUpPathCount; + + /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp. + typedef MapVector<const Value *, PtrState> MapTy; + + /// PerPtrTopDown - The top-down traversal uses this to record information + /// known about a pointer at the bottom of each block. + MapTy PerPtrTopDown; + + /// PerPtrBottomUp - The bottom-up traversal uses this to record information + /// known about a pointer at the top of each block. + MapTy PerPtrBottomUp; + + public: + BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} + + typedef MapTy::iterator ptr_iterator; + typedef MapTy::const_iterator ptr_const_iterator; + + ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } + ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } + ptr_const_iterator top_down_ptr_begin() const { + return PerPtrTopDown.begin(); + } + ptr_const_iterator top_down_ptr_end() const { + return PerPtrTopDown.end(); + } + + ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } + ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } + ptr_const_iterator bottom_up_ptr_begin() const { + return PerPtrBottomUp.begin(); + } + ptr_const_iterator bottom_up_ptr_end() const { + return PerPtrBottomUp.end(); + } + + /// SetAsEntry - Mark this block as being an entry block, which has one + /// path from the entry by definition. + void SetAsEntry() { TopDownPathCount = 1; } + + /// SetAsExit - Mark this block as being an exit block, which has one + /// path to an exit by definition. + void SetAsExit() { BottomUpPathCount = 1; } + + PtrState &getPtrTopDownState(const Value *Arg) { + return PerPtrTopDown[Arg]; + } + + PtrState &getPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp[Arg]; + } + + void clearBottomUpPointers() { + PerPtrBottomUp.clear(); + } + + void clearTopDownPointers() { + PerPtrTopDown.clear(); + } + + void InitFromPred(const BBState &Other); + void InitFromSucc(const BBState &Other); + void MergePred(const BBState &Other); + void MergeSucc(const BBState &Other); + + /// GetAllPathCount - Return the number of possible unique paths from an + /// entry to an exit which pass through this block. This is only valid + /// after both the top-down and bottom-up traversals are complete. + unsigned GetAllPathCount() const { + return TopDownPathCount * BottomUpPathCount; + } + + /// IsVisitedTopDown - Test whether the block for this BBState has been + /// visited by the top-down portion of the algorithm. + bool isVisitedTopDown() const { + return TopDownPathCount != 0; + } + }; +} + +void BBState::InitFromPred(const BBState &Other) { + PerPtrTopDown = Other.PerPtrTopDown; + TopDownPathCount = Other.TopDownPathCount; +} + +void BBState::InitFromSucc(const BBState &Other) { + PerPtrBottomUp = Other.PerPtrBottomUp; + BottomUpPathCount = Other.BottomUpPathCount; +} + +/// MergePred - The top-down traversal uses this to merge information about +/// predecessors to form the initial state for a new block. +void BBState::MergePred(const BBState &Other) { + // Other.TopDownPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + TopDownPathCount += Other.TopDownPathCount; + + // For each entry in the other set, if our set has an entry with the same key, + // merge the entries. Otherwise, copy the entry and merge it with an empty + // entry. + for (ptr_const_iterator MI = Other.top_down_ptr_begin(), + ME = Other.top_down_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/true); + } + + // For each entry in our set, if the other set doesn't have an entry with the + // same key, force it to merge with an empty entry. + for (ptr_iterator MI = top_down_ptr_begin(), + ME = top_down_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) + MI->second.Merge(PtrState(), /*TopDown=*/true); +} + +/// MergeSucc - The bottom-up traversal uses this to merge information about +/// successors to form the initial state for a new block. +void BBState::MergeSucc(const BBState &Other) { + // Other.BottomUpPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + BottomUpPathCount += Other.BottomUpPathCount; + + // For each entry in the other set, if our set has an entry with the + // same key, merge the entries. Otherwise, copy the entry and merge + // it with an empty entry. + for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), + ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/false); + } + + // For each entry in our set, if the other set doesn't have an entry + // with the same key, force it to merge with an empty entry. + for (ptr_iterator MI = bottom_up_ptr_begin(), + ME = bottom_up_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) + MI->second.Merge(PtrState(), /*TopDown=*/false); +} + +namespace { + /// ObjCARCOpt - The main ARC optimization pass. + class ObjCARCOpt : public FunctionPass { + bool Changed; + ProvenanceAnalysis PA; + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + /// RetainRVCallee, etc. - Declarations for ObjC runtime + /// functions, for use in creating calls to them. These are initialized + /// lazily to avoid cluttering up the Module with unused declarations. + Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, + *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee; + + /// UsedInThisFunciton - Flags which determine whether each of the + /// interesting runtine functions is in fact used in the current function. + unsigned UsedInThisFunction; + + /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release + /// metadata. + unsigned ImpreciseReleaseMDKind; + + Constant *getRetainRVCallee(Module *M); + Constant *getAutoreleaseRVCallee(Module *M); + Constant *getReleaseCallee(Module *M); + Constant *getRetainCallee(Module *M); + Constant *getRetainBlockCallee(Module *M); + Constant *getAutoreleaseCallee(Module *M); + + void OptimizeRetainCall(Function &F, Instruction *Retain); + bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); + void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV); + void OptimizeIndividualCalls(Function &F); + + void CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const; + bool VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains); + bool VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases); + bool Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases); + + void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M); + + bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + Module *M); + + void OptimizeWeakCalls(Function &F); + + bool OptimizeSequences(Function &F); + + void OptimizeReturns(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + virtual void releaseMemory(); + + public: + static char ID; + ObjCARCOpt() : FunctionPass(ID) { + initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCOpt::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) +INITIALIZE_PASS_END(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) + +Pass *llvm::createObjCARCOptPass() { + return new ObjCARCOpt(); +} + +void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<ObjCARCAliasAnalysis>(); + AU.addRequired<AliasAnalysis>(); + // ARC optimization doesn't currently split critical edges. + AU.setPreservesCFG(); +} + +Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { + if (!RetainRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainRVCallee = + M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, + Attributes); + } + return RetainRVCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { + if (!AutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + AutoreleaseRVCallee = + M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, + Attributes); + } + return AutoreleaseRVCallee; +} + +Constant *ObjCARCOpt::getReleaseCallee(Module *M) { + if (!ReleaseCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + ReleaseCallee = + M->getOrInsertFunction( + "objc_release", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attributes); + } + return ReleaseCallee; +} + +Constant *ObjCARCOpt::getRetainCallee(Module *M) { + if (!RetainCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainCallee = + M->getOrInsertFunction( + "objc_retain", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return RetainCallee; +} + +Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { + if (!RetainBlockCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + RetainBlockCallee = + M->getOrInsertFunction( + "objc_retainBlock", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return RetainBlockCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { + if (!AutoreleaseCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + AutoreleaseCallee = + M->getOrInsertFunction( + "objc_autorelease", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return AutoreleaseCallee; +} + +/// CanAlterRefCount - Test whether the given instruction can result in a +/// reference count modification (positive or negative) for the pointer's +/// object. +static bool +CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class) { + switch (Class) { + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_User: + // These operations never directly modify a reference count. + return false; + default: break; + } + + ImmutableCallSite CS = static_cast<const Value *>(Inst); + assert(CS && "Only calls can alter reference counts!"); + + // See if AliasAnalysis can help us with the call. + AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); + if (AliasAnalysis::onlyReadsMemory(MRB)) + return false; + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + const Value *Op = *I; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; + } + + // Assume the worst. + return true; +} + +/// CanUse - Test whether the given instruction can "use" the given pointer's +/// object in a way that requires the reference count to be positive. +static bool +CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, + InstructionClass Class) { + // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. + if (Class == IC_Call) + return false; + + // Consider various instructions which may have pointer arguments which are + // not "uses". + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { + // Comparing a pointer with null, or any other constant, isn't really a use, + // because we don't care what the pointer points to, or about the values + // of any other dynamic reference-counted pointers. + if (!IsPotentialUse(ICI->getOperand(1))) + return false; + } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { + // For calls, just check the arguments (and not the callee operand). + for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), + OE = CS.arg_end(); OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // Special-case stores, because we don't care about the stored value, just + // the store address. + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + // If we can't tell what the underlying object was, assume there is a + // dependence. + return IsPotentialUse(Op) && PA.related(Op, Ptr); + } + + // Check each operand for a match. + for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); + OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; +} + +/// CanInterruptRV - Test whether the given instruction can autorelease +/// any pointer or cause an autoreleasepool pop. +static bool +CanInterruptRV(InstructionClass Class) { + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_CallOrUser: + case IC_Call: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + return true; + default: + return false; + } +} + +namespace { + /// DependenceKind - There are several kinds of dependence-like concepts in + /// use here. + enum DependenceKind { + NeedsPositiveRetainCount, + CanChangeRetainCount, + RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. + RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. + RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. + }; +} + +/// Depends - Test if there can be dependencies on Inst through Arg. This +/// function only tests dependencies relevant for removing pairs of calls. +static bool +Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, + ProvenanceAnalysis &PA) { + // If we've reached the definition of Arg, stop. + if (Inst == Arg) + return true; + + switch (Flavor) { + case NeedsPositiveRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanUse(Inst, Arg, PA, Class); + } + } + + case CanChangeRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + // Conservatively assume this can decrement any count. + return true; + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanAlterRefCount(Inst, Arg, PA, Class); + } + } + + case RetainAutoreleaseDep: + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPop: + // Don't merge an objc_autorelease with an objc_retain inside a different + // autoreleasepool scope. + return true; + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Nothing else matters for objc_retainAutorelease formation. + return false; + } + break; + + case RetainAutoreleaseRVDep: { + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Anything that can autorelease interrupts + // retainAutoreleaseReturnValue formation. + return CanInterruptRV(Class); + } + break; + } + + case RetainRVDep: + return CanInterruptRV(GetBasicInstructionClass(Inst)); + } + + llvm_unreachable("Invalid dependence flavor"); + return true; +} + +/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and +/// find local and non-local dependencies on Arg. +/// TODO: Cache results? +static void +FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet<Instruction *, 4> &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> &Visited, + ProvenanceAnalysis &PA) { + BasicBlock::iterator StartPos = StartInst; + + SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist; + Worklist.push_back(std::make_pair(StartBB, StartPos)); + do { + std::pair<BasicBlock *, BasicBlock::iterator> Pair = + Worklist.pop_back_val(); + BasicBlock *LocalStartBB = Pair.first; + BasicBlock::iterator LocalStartPos = Pair.second; + BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); + for (;;) { + if (LocalStartPos == StartBBBegin) { + pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); + if (PI == PE) + // If we've reached the function entry, produce a null dependence. + DependingInstructions.insert(0); + else + // Add the predecessors to the worklist. + do { + BasicBlock *PredBB = *PI; + if (Visited.insert(PredBB)) + Worklist.push_back(std::make_pair(PredBB, PredBB->end())); + } while (++PI != PE); + break; + } + + Instruction *Inst = --LocalStartPos; + if (Depends(Flavor, Inst, Arg, PA)) { + DependingInstructions.insert(Inst); + break; + } + } + } while (!Worklist.empty()); + + // Determine whether the original StartBB post-dominates all of the blocks we + // visited. If not, insert a sentinal indicating that most optimizations are + // not safe. + for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(), + E = Visited.end(); I != E; ++I) { + const BasicBlock *BB = *I; + if (BB == StartBB) + continue; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + if (Succ != StartBB && !Visited.count(Succ)) { + DependingInstructions.insert(reinterpret_cast<Instruction *>(-1)); + return; + } + } + } +} + +static bool isNullOrUndef(const Value *V) { + return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); +} + +static bool isNoopInstruction(const Instruction *I) { + return isa<BitCastInst>(I) || + (isa<GetElementPtrInst>(I) && + cast<GetElementPtrInst>(I)->hasAllZeroIndices()); +} + +/// OptimizeRetainCall - Turn objc_retain into +/// objc_retainAutoreleasedReturnValue if the operand is a return value. +void +ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { + CallSite CS(GetObjCArg(Retain)); + Instruction *Call = CS.getInstruction(); + if (!Call) return; + if (Call->getParent() != Retain->getParent()) return; + + // Check that the call is next to the retain. + BasicBlock::iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I != Retain) + return; + + // Turn it to an objc_retainAutoreleasedReturnValue.. + Changed = true; + ++NumPeeps; + cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); +} + +/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into +/// objc_retain if the operand is not a return value. Or, if it can be +/// paired with an objc_autoreleaseReturnValue, delete the pair and +/// return true. +bool +ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { + // Check for the argument being from an immediately preceding call. + Value *Arg = GetObjCArg(RetainRV); + CallSite CS(Arg); + if (Instruction *Call = CS.getInstruction()) + if (Call->getParent() == RetainRV->getParent()) { + BasicBlock::iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } + + // Check for being preceded by an objc_autoreleaseReturnValue on the same + // pointer. In this case, we can delete the pair. + BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); + if (I != Begin) { + do --I; while (I != Begin && isNoopInstruction(I)); + if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && + GetObjCArg(I) == Arg) { + Changed = true; + ++NumPeeps; + EraseInstruction(I); + EraseInstruction(RetainRV); + return true; + } + } + + // Turn it to a plain objc_retain. + Changed = true; + ++NumPeeps; + cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); + return false; +} + +/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into +/// objc_autorelease if the result is not used as a return value. +void +ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { + // Check for a return of the pointer value. + const Value *Ptr = GetObjCArg(AutoreleaseRV); + SmallVector<const Value *, 2> Users; + Users.push_back(Ptr); + do { + Ptr = Users.pop_back_val(); + for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); + UI != UE; ++UI) { + const User *I = *UI; + if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) + return; + if (isa<BitCastInst>(I)) + Users.push_back(I); + } + } while (!Users.empty()); + + Changed = true; + ++NumPeeps; + cast<CallInst>(AutoreleaseRV)-> + setCalledFunction(getAutoreleaseCallee(F.getParent())); +} + +/// OptimizeIndividualCalls - Visit each call, one at a time, and make +/// simplifications without doing any additional analysis. +void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { + // Reset all the flags in preparation for recomputing them. + UsedInThisFunction = 0; + + // Visit all objc_* calls in F. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + + switch (Class) { + default: break; + + // Delete no-op casts. These function calls have special semantics, but + // the semantics are entirely implemented via lowering in the front-end, + // so by the time they reach the optimizer, they are just no-op calls + // which return their argument. + // + // There are gray areas here, as the ability to cast reference-counted + // pointers to raw void* and back allows code to break ARC assumptions, + // however these are currently considered to be unimportant. + case IC_NoopCast: + Changed = true; + ++NumNoops; + EraseInstruction(Inst); + continue; + + // If the pointer-to-weak-pointer is null, it's undefined behavior. + case IC_StoreWeak: + case IC_LoadWeak: + case IC_LoadWeakRetained: + case IC_InitWeak: + case IC_DestroyWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0))) { + Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_CopyWeak: + case IC_MoveWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0)) || + isNullOrUndef(CI->getArgOperand(1))) { + Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_Retain: + OptimizeRetainCall(F, Inst); + break; + case IC_RetainRV: + if (OptimizeRetainRVCall(F, Inst)) + continue; + break; + case IC_AutoreleaseRV: + OptimizeAutoreleaseRVCall(F, Inst); + break; + } + + // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. + if (IsAutorelease(Class) && Inst->use_empty()) { + CallInst *Call = cast<CallInst>(Inst); + const Value *Arg = Call->getArgOperand(0); + Arg = FindSingleUseIdentifiedObject(Arg); + if (Arg) { + Changed = true; + ++NumAutoreleases; + + // Create the declaration lazily. + LLVMContext &C = Inst->getContext(); + CallInst *NewCall = + CallInst::Create(getReleaseCallee(F.getParent()), + Call->getArgOperand(0), "", Call); + NewCall->setMetadata(ImpreciseReleaseMDKind, + MDNode::get(C, ArrayRef<Value *>())); + EraseInstruction(Call); + Inst = NewCall; + Class = IC_Release; + } + } + + // For functions which can never be passed stack arguments, add + // a tail keyword. + if (IsAlwaysTail(Class)) { + Changed = true; + cast<CallInst>(Inst)->setTailCall(); + } + + // Set nounwind as needed. + if (IsNoThrow(Class)) { + Changed = true; + cast<CallInst>(Inst)->setDoesNotThrow(); + } + + if (!IsNoopOnNull(Class)) { + UsedInThisFunction |= 1 << Class; + continue; + } + + const Value *Arg = GetObjCArg(Inst); + + // ARC calls with null are no-ops. Delete them. + if (isNullOrUndef(Arg)) { + Changed = true; + ++NumNoops; + EraseInstruction(Inst); + continue; + } + + // Keep track of which of retain, release, autorelease, and retain_block + // are actually present in this function. + UsedInThisFunction |= 1 << Class; + + // If Arg is a PHI, and one or more incoming values to the + // PHI are null, and the call is control-equivalent to the PHI, and there + // are no relevant side effects between the PHI and the call, the call + // could be pushed up to just those paths with non-null incoming values. + // For now, don't bother splitting critical edges for this. + SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; + Worklist.push_back(std::make_pair(Inst, Arg)); + do { + std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); + Inst = Pair.first; + Arg = Pair.second; + + const PHINode *PN = dyn_cast<PHINode>(Arg); + if (!PN) continue; + + // Determine if the PHI has any null operands, or any incoming + // critical edges. + bool HasNull = false; + bool HasCriticalEdges = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (isNullOrUndef(Incoming)) + HasNull = true; + else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) + .getNumSuccessors() != 1) { + HasCriticalEdges = true; + break; + } + } + // If we have null operands and no critical edges, optimize. + if (!HasCriticalEdges && HasNull) { + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + + // Check that there is nothing that cares about the reference + // count between the call and the phi. + FindDependencies(NeedsPositiveRetainCount, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() == 1 && + *DependingInstructions.begin() == PN) { + Changed = true; + ++NumPartialNoops; + // Clone the call into each predecessor that has a non-null value. + CallInst *CInst = cast<CallInst>(Inst); + Type *ParamTy = CInst->getArgOperand(0)->getType(); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (!isNullOrUndef(Incoming)) { + CallInst *Clone = cast<CallInst>(CInst->clone()); + Value *Op = PN->getIncomingValue(i); + Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); + if (Op->getType() != ParamTy) + Op = new BitCastInst(Op, ParamTy, "", InsertPos); + Clone->setArgOperand(0, Op); + Clone->insertBefore(InsertPos); + Worklist.push_back(std::make_pair(Clone, Incoming)); + } + } + // Erase the original call. + EraseInstruction(CInst); + continue; + } + } + } while (!Worklist.empty()); + } +} + +/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible +/// control flow, or other CFG structures where moving code across the edge +/// would result in it being executed more. +void +ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const { + // If any top-down local-use or possible-dec has a succ which is earlier in + // the sequence, forget it. + for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) + switch (I->second.GetSeq()) { + default: break; + case S_Use: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + PtrState &S = MyStates.getPtrTopDownState(Arg); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); + switch (SuccS.GetSeq()) { + case S_None: + case S_CanRelease: { + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + S.ClearSequenceProgress(); + continue; + } + case S_Use: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + } + case S_CanRelease: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + PtrState &S = MyStates.getPtrTopDownState(Arg); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); + switch (SuccS.GetSeq()) { + case S_None: { + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + S.ClearSequenceProgress(); + continue; + } + case S_CanRelease: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + } + } +} + +bool +ObjCARCOpt::VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each successor to compute the initial state + // for the current block. + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + succ_const_iterator SI(TI), SE(TI, false); + if (SI == SE) + MyStates.SetAsExit(); + else + do { + const BasicBlock *Succ = *SI++; + if (Succ == BB) + continue; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); + // If we haven't seen this node yet, then we've found a CFG cycle. + // Be optimistic here; it's CheckForCFGHazards' job detect trouble. + if (I == BBStates.end()) + continue; + MyStates.InitFromSucc(I->second); + while (SI != SE) { + Succ = *SI++; + if (Succ != BB) { + I = BBStates.find(Succ); + if (I != BBStates.end()) + MyStates.MergeSucc(I->second); + } + } + break; + } while (SI != SE); + + // Visit all the instructions, bottom-up. + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { + Instruction *Inst = llvm::prior(I); + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + NestingDetected = true; + + S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); + S.RRI.clear(); + S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + S.RRI.Calls.insert(Inst); + + S.IncrementRefCount(); + S.IncrementNestCount(); + break; + } + case IC_RetainBlock: + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + S.DecrementRefCount(); + S.SetAtLeastOneRefCount(); + S.DecrementNestCount(); + + // An objc_retainBlock call with just a use still needs to be kept, + // because it may be copying a block from the stack to the heap. + if (Class == IC_RetainBlock && S.GetSeq() == S_Use) + S.SetSeq(S_CanRelease); + + switch (S.GetSeq()) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_CanRelease: + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + Retains[Inst] = S.RRI; + } + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + continue; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearBottomUpPointers(); + continue; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + continue; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.DecrementRefCount(); + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + S.SetSeq(S_Use); + } else if (Seq == S_Release && + (Class == IC_User || Class == IC_CallOrUser)) { + // Non-movable releases depend on any possible objc pointer use. + S.SetSeq(S_Stop); + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each predecessor to compute the initial state + // for the current block. + const_pred_iterator PI(BB), PE(BB, false); + if (PI == PE) + MyStates.SetAsEntry(); + else + do { + const BasicBlock *Pred = *PI++; + if (Pred == BB) + continue; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); + assert(I != BBStates.end()); + // If we haven't seen this node yet, then we've found a CFG cycle. + // Be optimistic here; it's CheckForCFGHazards' job detect trouble. + if (!I->second.isVisitedTopDown()) + continue; + MyStates.InitFromPred(I->second); + while (PI != PE) { + Pred = *PI++; + if (Pred != BB) { + I = BBStates.find(Pred); + assert(I != BBStates.end()); + if (I->second.isVisitedTopDown()) + MyStates.MergePred(I->second); + } + } + break; + } while (PI != PE); + + // Visit all the instructions, top-down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + Instruction *Inst = I; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_RetainBlock: + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + // If we see two retains in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Retain) + NestingDetected = true; + + S.SetSeq(S_Retain); + S.RRI.clear(); + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + // Don't check S.IsKnownIncremented() here because it's not + // sufficient. + S.RRI.KnownSafe = S.IsKnownNested(); + S.RRI.Calls.insert(Inst); + } + + S.SetAtLeastOneRefCount(); + S.IncrementRefCount(); + S.IncrementNestCount(); + continue; + } + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + S.DecrementRefCount(); + S.DecrementNestCount(); + + switch (S.GetSeq()) { + case S_Retain: + case S_CanRelease: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_Use: + S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + Releases[Inst] = S.RRI; + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearTopDownPointers(); + continue; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + continue; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.DecrementRefCount(); + switch (Seq) { + case S_Retain: + S.SetSeq(S_CanRelease); + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + continue; + case S_Use: + case S_CanRelease: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_CanRelease: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_Retain: + // An objc_retainBlock call may be responsible for copying the block + // data from the stack to the heap. Model this by moving it straight + // from S_Retain to S_Use. + if (S.RRI.IsRetainBlock && + CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + S.SetSeq(S_Use); + } + break; + case S_Use: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + } + + CheckForCFGHazards(BB, BBStates, MyStates); + return NestingDetected; +} + +// Visit - Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + // Use reverse-postorder on the reverse CFG for bottom-up, because we + // magically know that loops will be well behaved, i.e. they won't repeatedly + // call retain on a single pointer without doing a release. We can't use + // ReversePostOrderTraversal here because we want to walk up from each + // function exit point. + SmallPtrSet<BasicBlock *, 16> Visited; + SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack; + SmallVector<BasicBlock *, 16> Order; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *BB = I; + if (BB->getTerminator()->getNumSuccessors() == 0) + Stack.push_back(std::make_pair(BB, pred_begin(BB))); + } + while (!Stack.empty()) { + pred_iterator End = pred_end(Stack.back().first); + while (Stack.back().second != End) { + BasicBlock *BB = *Stack.back().second++; + if (Visited.insert(BB)) + Stack.push_back(std::make_pair(BB, pred_begin(BB))); + } + Order.push_back(Stack.pop_back_val().first); + } + bool BottomUpNestingDetected = false; + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + Order.rbegin(), E = Order.rend(); I != E; ++I) { + BasicBlock *BB = *I; + BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); + } + + // Use regular reverse-postorder for top-down. + bool TopDownNestingDetected = false; + typedef ReversePostOrderTraversal<Function *> RPOTType; + RPOTType RPOT(&F); + for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + BasicBlock *BB = *I; + TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); + } + + return TopDownNestingDetected && BottomUpNestingDetected; +} + +/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove. +void ObjCARCOpt::MoveCalls(Value *Arg, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M) { + Type *ArgTy = Arg->getType(); + Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); + + // Insert the new retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = ReleasesToMove.ReverseInsertPts.begin(), + PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = + CallInst::Create(RetainsToMove.IsRetainBlock ? + getRetainBlockCallee(M) : getRetainCallee(M), + MyArg, "", InsertPt); + Call->setDoesNotThrow(); + if (!RetainsToMove.IsRetainBlock) + Call->setTailCall(); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = RetainsToMove.ReverseInsertPts.begin(), + PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *LastUse = *PI; + Instruction *InsertPts[] = { 0, 0, 0 }; + if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) { + // We can't insert code immediately after an invoke instruction, so + // insert code at the beginning of both successor blocks instead. + // The invoke's return value isn't available in the unwind block, + // but our releases will never depend on it, because they must be + // paired with retains from before the invoke. + InsertPts[0] = II->getNormalDest()->getFirstInsertionPt(); + InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt(); + } else { + // Insert code immediately after the last use. + InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); + } + + for (Instruction **I = InsertPts; *I; ++I) { + Instruction *InsertPt = *I; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, + "", InsertPt); + // Attach a clang.imprecise_release metadata tag, if appropriate. + if (MDNode *M = ReleasesToMove.ReleaseMetadata) + Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setDoesNotThrow(); + if (ReleasesToMove.IsTailCallRelease) + Call->setTailCall(); + } + } + + // Delete the original retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = RetainsToMove.Calls.begin(), + AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRetain = *AI; + Retains.blot(OrigRetain); + DeadInsts.push_back(OrigRetain); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = ReleasesToMove.Calls.begin(), + AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRelease = *AI; + Releases.erase(OrigRelease); + DeadInsts.push_back(OrigRelease); + } +} + +bool +ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> + &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + Module *M) { + bool AnyPairsCompletelyEliminated = false; + RRInfo RetainsToMove; + RRInfo ReleasesToMove; + SmallVector<Instruction *, 4> NewRetains; + SmallVector<Instruction *, 4> NewReleases; + SmallVector<Instruction *, 8> DeadInsts; + + for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), + E = Retains.end(); I != E; ++I) { + Value *V = I->first; + if (!V) continue; // blotted + + Instruction *Retain = cast<Instruction>(V); + Value *Arg = GetObjCArg(Retain); + + // If the object being released is in static storage, we know it's + // not being managed by ObjC reference counting, so we can delete pairs + // regardless of what possible decrements or uses lie between them. + bool KnownSafe = isa<Constant>(Arg); + + // Same for stack storage, unless this is an objc_retainBlock call, + // which is responsible for copying the block data from the stack to + // the heap. + if (!I->second.IsRetainBlock && isa<AllocaInst>(Arg)) + KnownSafe = true; + + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>( + StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) + if (GV->isConstant()) + KnownSafe = true; + + // If a pair happens in a region where it is known that the reference count + // is already incremented, we can similarly ignore possible decrements. + bool KnownSafeTD = true, KnownSafeBU = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + // This is an iterative process so that we connect multiple releases + // to multiple retains if needed. + unsigned OldDelta = 0; + unsigned NewDelta = 0; + unsigned OldCount = 0; + unsigned NewCount = 0; + bool FirstRelease = true; + bool FirstRetain = true; + NewRetains.push_back(Retain); + for (;;) { + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { + Instruction *NewRetain = *NI; + MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); + assert(It != Retains.end()); + const RRInfo &NewRetainRRI = It->second; + KnownSafeTD &= NewRetainRRI.KnownSafe; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewRetainRRI.Calls.begin(), + LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewRetainRelease = *LI; + DenseMap<Value *, RRInfo>::const_iterator Jt = + Releases.find(NewRetainRelease); + if (Jt == Releases.end()) + goto next_retain; + const RRInfo &NewRetainReleaseRRI = Jt->second; + assert(NewRetainReleaseRRI.Calls.count(NewRetain)); + if (ReleasesToMove.Calls.insert(NewRetainRelease)) { + OldDelta -= + BBStates[NewRetainRelease->getParent()].GetAllPathCount(); + + // Merge the ReleaseMetadata and IsTailCallRelease values. + if (FirstRelease) { + ReleasesToMove.ReleaseMetadata = + NewRetainReleaseRRI.ReleaseMetadata; + ReleasesToMove.IsTailCallRelease = + NewRetainReleaseRRI.IsTailCallRelease; + FirstRelease = false; + } else { + if (ReleasesToMove.ReleaseMetadata != + NewRetainReleaseRRI.ReleaseMetadata) + ReleasesToMove.ReleaseMetadata = 0; + if (ReleasesToMove.IsTailCallRelease != + NewRetainReleaseRRI.IsTailCallRelease) + ReleasesToMove.IsTailCallRelease = false; + } + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), + RE = NewRetainReleaseRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (ReleasesToMove.ReverseInsertPts.insert(RIP)) + NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); + } + NewReleases.push_back(NewRetainRelease); + } + } + } + NewRetains.clear(); + if (NewReleases.empty()) break; + + // Back the other way. + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { + Instruction *NewRelease = *NI; + DenseMap<Value *, RRInfo>::const_iterator It = + Releases.find(NewRelease); + assert(It != Releases.end()); + const RRInfo &NewReleaseRRI = It->second; + KnownSafeBU &= NewReleaseRRI.KnownSafe; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewReleaseRRI.Calls.begin(), + LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewReleaseRetain = *LI; + MapVector<Value *, RRInfo>::const_iterator Jt = + Retains.find(NewReleaseRetain); + if (Jt == Retains.end()) + goto next_retain; + const RRInfo &NewReleaseRetainRRI = Jt->second; + assert(NewReleaseRetainRRI.Calls.count(NewRelease)); + if (RetainsToMove.Calls.insert(NewReleaseRetain)) { + unsigned PathCount = + BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); + OldDelta += PathCount; + OldCount += PathCount; + + // Merge the IsRetainBlock values. + if (FirstRetain) { + RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; + FirstRetain = false; + } else if (ReleasesToMove.IsRetainBlock != + NewReleaseRetainRRI.IsRetainBlock) + // It's not possible to merge the sequences if one uses + // objc_retain and the other uses objc_retainBlock. + goto next_retain; + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), + RE = NewReleaseRetainRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (RetainsToMove.ReverseInsertPts.insert(RIP)) { + PathCount = BBStates[RIP->getParent()].GetAllPathCount(); + NewDelta += PathCount; + NewCount += PathCount; + } + } + NewRetains.push_back(NewReleaseRetain); + } + } + } + NewReleases.clear(); + if (NewRetains.empty()) break; + } + + // If the pointer is known incremented or nested, we can safely delete the + // pair regardless of what's between them. + if (KnownSafeTD || KnownSafeBU) { + RetainsToMove.ReverseInsertPts.clear(); + ReleasesToMove.ReverseInsertPts.clear(); + NewCount = 0; + } else { + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + goto next_retain; + } + + // Determine whether the original call points are balanced in the retain and + // release calls through the program. If not, conservatively don't touch + // them. + // TODO: It's theoretically possible to do code motion in this case, as + // long as the existing imbalances are maintained. + if (OldDelta != 0) + goto next_retain; + + // Ok, everything checks out and we're all set. Let's move some code! + Changed = true; + AnyPairsCompletelyEliminated = NewCount == 0; + NumRRs += OldCount - NewCount; + MoveCalls(Arg, RetainsToMove, ReleasesToMove, + Retains, Releases, DeadInsts, M); + + next_retain: + NewReleases.clear(); + NewRetains.clear(); + RetainsToMove.clear(); + ReleasesToMove.clear(); + } + + // Now that we're done moving everything, we can delete the newly dead + // instructions, as we no longer need them as insert points. + while (!DeadInsts.empty()) + EraseInstruction(DeadInsts.pop_back_val()); + + return AnyPairsCompletelyEliminated; +} + +/// OptimizeWeakCalls - Weak pointer optimizations. +void ObjCARCOpt::OptimizeWeakCalls(Function &F) { + // First, do memdep-style RLE and S2L optimizations. We can't use memdep + // itself because it uses AliasAnalysis and we need to do provenance + // queries instead. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) + continue; + + // Delete objc_loadWeak calls with no users. + if (Class == IC_LoadWeak && Inst->use_empty()) { + Inst->eraseFromParent(); + continue; + } + + // TODO: For now, just look for an earlier available version of this value + // within the same block. Theoretically, we could do memdep-style non-local + // analysis too, but that would want caching. A better approach would be to + // use the technique that EarlyCSE uses. + inst_iterator Current = llvm::prior(I); + BasicBlock *CurrentBB = Current.getBasicBlockIterator(); + for (BasicBlock::iterator B = CurrentBB->begin(), + J = Current.getInstructionIterator(); + J != B; --J) { + Instruction *EarlierInst = &*llvm::prior(J); + InstructionClass EarlierClass = GetInstructionClass(EarlierInst); + switch (EarlierClass) { + case IC_LoadWeak: + case IC_LoadWeakRetained: { + // If this is loading from the same pointer, replace this load's value + // with that one. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_StoreWeak: + case IC_InitWeak: { + // If this is storing to the same pointer and has the same size etc. + // replace this load's value with the stored value. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_MoveWeak: + case IC_CopyWeak: + // TOOD: Grab the copied value. + goto clobbered; + case IC_AutoreleasepoolPush: + case IC_None: + case IC_User: + // Weak pointers are only modified through the weak entry points + // (and arbitrary calls, which could call the weak entry points). + break; + default: + // Anything else could modify the weak pointer. + goto clobbered; + } + } + clobbered:; + } + + // Then, for each destroyWeak with an alloca operand, check to see if + // the alloca and all its users can be zapped. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_DestroyWeak) + continue; + + CallInst *Call = cast<CallInst>(Inst); + Value *Arg = Call->getArgOperand(0); + if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ++UI) { + Instruction *UserInst = cast<Instruction>(*UI); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + case IC_DestroyWeak: + continue; + default: + goto done; + } + } + Changed = true; + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ) { + CallInst *UserInst = cast<CallInst>(*UI++); + if (!UserInst->use_empty()) + UserInst->replaceAllUsesWith(UserInst->getOperand(1)); + UserInst->eraseFromParent(); + } + Alloca->eraseFromParent(); + done:; + } + } +} + +/// OptimizeSequences - Identify program paths which execute sequences of +/// retains and releases which can be eliminated. +bool ObjCARCOpt::OptimizeSequences(Function &F) { + /// Releases, Retains - These are used to store the results of the main flow + /// analysis. These use Value* as the key instead of Instruction* so that the + /// map stays valid when we get around to rewriting code and calls get + /// replaced by arguments. + DenseMap<Value *, RRInfo> Releases; + MapVector<Value *, RRInfo> Retains; + + /// BBStates, This is used during the traversal of the function to track the + /// states for each identified object at each block. + DenseMap<const BasicBlock *, BBState> BBStates; + + // Analyze the CFG of the function, and all instructions. + bool NestingDetected = Visit(F, BBStates, Retains, Releases); + + // Transform. + return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && + NestingDetected; +} + +/// OptimizeReturns - Look for this pattern: +/// +/// %call = call i8* @something(...) +/// %2 = call i8* @objc_retain(i8* %call) +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// +/// And delete the retain and autorelease. +/// +/// Otherwise if it's just this: +/// +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// +/// convert the autorelease to autoreleaseRV. +void ObjCARCOpt::OptimizeReturns(Function &F) { + if (!F.getReturnType()->isPointerTy()) + return; + + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + BasicBlock *BB = FI; + ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); + if (!Ret) continue; + + const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); + FindDependencies(NeedsPositiveRetainCount, Arg, + BB, Ret, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Autorelease = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + if (!Autorelease) + goto next_block; + InstructionClass AutoreleaseClass = + GetBasicInstructionClass(Autorelease); + if (!IsAutorelease(AutoreleaseClass)) + goto next_block; + if (GetObjCArg(Autorelease) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Check that there is nothing that can affect the reference + // count between the autorelease and the retain. + FindDependencies(CanChangeRetainCount, Arg, + BB, Autorelease, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Retain = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that we found a retain with the same argument. + if (!Retain || + !IsRetain(GetBasicInstructionClass(Retain)) || + GetObjCArg(Retain) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Convert the autorelease to an autoreleaseRV, since it's + // returning the value. + if (AutoreleaseClass == IC_Autorelease) { + Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); + AutoreleaseClass = IC_AutoreleaseRV; + } + + // Check that there is nothing that can affect the reference + // count between the retain and the call. + // Note that Retain need not be in BB. + FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Call = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that the pointer is the return value of the call. + if (!Call || Arg != Call) + goto next_block; + + // Check that the call is a regular call. + InstructionClass Class = GetBasicInstructionClass(Call); + if (Class != IC_CallOrUser && Class != IC_Call) + goto next_block; + + // If so, we can zap the retain and autorelease. + Changed = true; + ++NumRets; + EraseInstruction(Retain); + EraseInstruction(Autorelease); + } + } + } + + next_block: + DependingInstructions.clear(); + Visited.clear(); + } +} + +bool ObjCARCOpt::doInitialization(Module &M) { + if (!EnableARCOpts) + return false; + + Run = ModuleHasARC(M); + if (!Run) + return false; + + // Identify the imprecise release metadata kind. + ImpreciseReleaseMDKind = + M.getContext().getMDKindID("clang.imprecise_release"); + + // Intuitively, objc_retain and others are nocapture, however in practice + // they are not, because they return their argument value. And objc_release + // calls finalizers. + + // These are initialized lazily. + RetainRVCallee = 0; + AutoreleaseRVCallee = 0; + ReleaseCallee = 0; + RetainCallee = 0; + RetainBlockCallee = 0; + AutoreleaseCallee = 0; + + return false; +} + +bool ObjCARCOpt::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // This pass performs several distinct transformations. As a compile-time aid + // when compiling code that isn't ObjC, skip these if the relevant ObjC + // library functions aren't declared. + + // Preliminary optimizations. This also computs UsedInThisFunction. + OptimizeIndividualCalls(F); + + // Optimizations for weak pointers. + if (UsedInThisFunction & ((1 << IC_LoadWeak) | + (1 << IC_LoadWeakRetained) | + (1 << IC_StoreWeak) | + (1 << IC_InitWeak) | + (1 << IC_CopyWeak) | + (1 << IC_MoveWeak) | + (1 << IC_DestroyWeak))) + OptimizeWeakCalls(F); + + // Optimizations for retain+release pairs. + if (UsedInThisFunction & ((1 << IC_Retain) | + (1 << IC_RetainRV) | + (1 << IC_RetainBlock))) + if (UsedInThisFunction & (1 << IC_Release)) + // Run OptimizeSequences until it either stops making changes or + // no retain+release pair nesting is detected. + while (OptimizeSequences(F)) {} + + // Optimizations if objc_autorelease is used. + if (UsedInThisFunction & + ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV))) + OptimizeReturns(F); + + return Changed; +} + +void ObjCARCOpt::releaseMemory() { + PA.clear(); +} + +//===----------------------------------------------------------------------===// +// ARC contraction. +//===----------------------------------------------------------------------===// + +// TODO: ObjCARCContract could insert PHI nodes when uses aren't +// dominated by single calls. + +#include "llvm/Operator.h" +#include "llvm/InlineAsm.h" +#include "llvm/Analysis/Dominators.h" + +STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); + +namespace { + /// ObjCARCContract - Late ARC optimizations. These change the IR in a way + /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late. + class ObjCARCContract : public FunctionPass { + bool Changed; + AliasAnalysis *AA; + DominatorTree *DT; + ProvenanceAnalysis PA; + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + /// StoreStrongCallee, etc. - Declarations for ObjC runtime + /// functions, for use in creating calls to them. These are initialized + /// lazily to avoid cluttering up the Module with unused declarations. + Constant *StoreStrongCallee, + *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee; + + /// RetainRVMarker - The inline asm string to insert between calls and + /// RetainRV calls to make the optimization work on targets which need it. + const MDString *RetainRVMarker; + + Constant *getStoreStrongCallee(Module *M); + Constant *getRetainAutoreleaseCallee(Module *M); + Constant *getRetainAutoreleaseRVCallee(Module *M); + + bool ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited); + + void ContractRelease(Instruction *Release, + inst_iterator &Iter); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + public: + static char ID; + ObjCARCContract() : FunctionPass(ID) { + initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCContract::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) + +Pass *llvm::createObjCARCContractPass() { + return new ObjCARCContract(); +} + +void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); +} + +Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { + if (!StoreStrongCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + std::vector<Type *> Params; + Params.push_back(I8XX); + Params.push_back(I8X); + + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + Attributes.addAttr(1, Attribute::NoCapture); + + StoreStrongCallee = + M->getOrInsertFunction( + "objc_storeStrong", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attributes); + } + return StoreStrongCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { + if (!RetainAutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainAutoreleaseCallee = + M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); + } + return RetainAutoreleaseCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { + if (!RetainAutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainAutoreleaseRVCallee = + M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, + Attributes); + } + return RetainAutoreleaseRVCallee; +} + +/// ContractAutorelease - Merge an autorelease with a retain into a fused +/// call. +bool +ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited) { + const Value *Arg = GetObjCArg(Autorelease); + + // Check that there are no instructions between the retain and the autorelease + // (such as an autorelease_pop) which may change the count. + CallInst *Retain = 0; + if (Class == IC_AutoreleaseRV) + FindDependencies(RetainAutoreleaseRVDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + else + FindDependencies(RetainAutoreleaseDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + + Visited.clear(); + if (DependingInstructions.size() != 1) { + DependingInstructions.clear(); + return false; + } + + Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + DependingInstructions.clear(); + + if (!Retain || + GetBasicInstructionClass(Retain) != IC_Retain || + GetObjCArg(Retain) != Arg) + return false; + + Changed = true; + ++NumPeeps; + + if (Class == IC_AutoreleaseRV) + Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); + else + Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); + + EraseInstruction(Autorelease); + return true; +} + +/// ContractRelease - Attempt to merge an objc_release with a store, load, and +/// objc_retain to form an objc_storeStrong. This can be a little tricky because +/// the instructions don't always appear in order, and there may be unrelated +/// intervening instructions. +void ObjCARCContract::ContractRelease(Instruction *Release, + inst_iterator &Iter) { + LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); + if (!Load || !Load->isSimple()) return; + + // For now, require everything to be in one basic block. + BasicBlock *BB = Release->getParent(); + if (Load->getParent() != BB) return; + + // Walk down to find the store. + BasicBlock::iterator I = Load, End = BB->end(); + ++I; + AliasAnalysis::Location Loc = AA->getLocation(Load); + while (I != End && + (&*I == Release || + IsRetain(GetBasicInstructionClass(I)) || + !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod))) + ++I; + StoreInst *Store = dyn_cast<StoreInst>(I); + if (!Store || !Store->isSimple()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + + Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); + + // Walk up to find the retain. + I = Store; + BasicBlock::iterator Begin = BB->begin(); + while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) + --I; + Instruction *Retain = I; + if (GetBasicInstructionClass(Retain) != IC_Retain) return; + if (GetObjCArg(Retain) != New) return; + + Changed = true; + ++NumStoreStrongs; + + LLVMContext &C = Release->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + + Value *Args[] = { Load->getPointerOperand(), New }; + if (Args[0]->getType() != I8XX) + Args[0] = new BitCastInst(Args[0], I8XX, "", Store); + if (Args[1]->getType() != I8X) + Args[1] = new BitCastInst(Args[1], I8X, "", Store); + CallInst *StoreStrong = + CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), + Args, "", Store); + StoreStrong->setDoesNotThrow(); + StoreStrong->setDebugLoc(Store->getDebugLoc()); + + if (&*Iter == Store) ++Iter; + Store->eraseFromParent(); + Release->eraseFromParent(); + EraseInstruction(Retain); + if (Load->use_empty()) + Load->eraseFromParent(); +} + +bool ObjCARCContract::doInitialization(Module &M) { + Run = ModuleHasARC(M); + if (!Run) + return false; + + // These are initialized lazily. + StoreStrongCallee = 0; + RetainAutoreleaseCallee = 0; + RetainAutoreleaseRVCallee = 0; + + // Initialize RetainRVMarker. + RetainRVMarker = 0; + if (NamedMDNode *NMD = + M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) + if (NMD->getNumOperands() == 1) { + const MDNode *N = NMD->getOperand(0); + if (N->getNumOperands() == 1) + if (const MDString *S = dyn_cast<MDString>(N->getOperand(0))) + RetainRVMarker = S; + } + + return false; +} + +bool ObjCARCContract::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // For ObjC library calls which return their argument, replace uses of the + // argument with uses of the call return value, if it dominates the use. This + // reduces register pressure. + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + // Only these library routines return their argument. In particular, + // objc_retainBlock does not necessarily return its argument. + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + break; + case IC_Autorelease: + case IC_AutoreleaseRV: + if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) + continue; + break; + case IC_RetainRV: { + // If we're compiling for a target which needs a special inline-asm + // marker to do the retainAutoreleasedReturnValue optimization, + // insert it now. + if (!RetainRVMarker) + break; + BasicBlock::iterator BBI = Inst; + --BBI; + while (isNoopInstruction(BBI)) --BBI; + if (&*BBI == GetObjCArg(Inst)) { + InlineAsm *IA = + InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), + /*isVarArg=*/false), + RetainRVMarker->getString(), + /*Constraints=*/"", /*hasSideEffects=*/true); + CallInst::Create(IA, "", Inst); + } + break; + } + case IC_InitWeak: { + // objc_initWeak(p, null) => *p = null + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(1))) { + Value *Null = + ConstantPointerNull::get(cast<PointerType>(CI->getType())); + Changed = true; + new StoreInst(Null, CI->getArgOperand(0), CI); + CI->replaceAllUsesWith(Null); + CI->eraseFromParent(); + } + continue; + } + case IC_Release: + ContractRelease(Inst, I); + continue; + default: + continue; + } + + // Don't use GetObjCArg because we don't want to look through bitcasts + // and such; to do the replacement, the argument must have type i8*. + const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); + for (;;) { + // If we're compiling bugpointed code, don't get in trouble. + if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) + break; + // Look through the uses of the pointer. + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ) { + Use &U = UI.getUse(); + unsigned OperandNo = UI.getOperandNo(); + ++UI; // Increment UI now, because we may unlink its element. + if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser())) + if (Inst != UserInst && DT->dominates(Inst, UserInst)) { + Changed = true; + Instruction *Replacement = Inst; + Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = + PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = + PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + for (unsigned i = 0, e = PHI->getNumIncomingValues(); + i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", UserInst); + U.set(Replacement); + } + } + } + + // If Arg is a no-op casted pointer, strip one level of casts and + // iterate. + if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) + Arg = BI->getOperand(0); + else if (isa<GEPOperator>(Arg) && + cast<GEPOperator>(Arg)->hasAllZeroIndices()) + Arg = cast<GEPOperator>(Arg)->getPointerOperand(); + else if (isa<GlobalAlias>(Arg) && + !cast<GlobalAlias>(Arg)->mayBeOverridden()) + Arg = cast<GlobalAlias>(Arg)->getAliasee(); + else + break; + } + } + + return Changed; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/Scalar/PACKAGE.vcxproj new file mode 100644 index 0000000..bf77a09 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Scalar;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Scalar/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/Scalar/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Scalar/Reassociate.cpp b/src/LLVM/lib/Transforms/Scalar/Reassociate.cpp index 89a2709..8f98a5b 100644 --- a/src/LLVM/lib/Transforms/Scalar/Reassociate.cpp +++ b/src/LLVM/lib/Transforms/Scalar/Reassociate.cpp
@@ -22,6 +22,7 @@ #define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -74,10 +75,14 @@ class Reassociate : public FunctionPass { DenseMap<BasicBlock*, unsigned> RankMap; DenseMap<AssertingVH<>, unsigned> ValueRankMap; + SmallVector<WeakVH, 8> RedoInsts; + SmallVector<WeakVH, 8> DeadInsts; bool MadeChange; public: static char ID; // Pass identification, replacement for typeid - Reassociate() : FunctionPass(ID) {} + Reassociate() : FunctionPass(ID) { + initializeReassociatePass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F); @@ -96,7 +101,7 @@ void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); void LinearizeExpr(BinaryOperator *I); Value *RemoveFactorFromExpression(Value *V, Value *Factor); - void ReassociateBB(BasicBlock *BB); + void ReassociateInst(BasicBlock::iterator &BBI); void RemoveDeadBinaryOp(Value *V); }; @@ -104,20 +109,20 @@ char Reassociate::ID = 0; INITIALIZE_PASS(Reassociate, "reassociate", - "Reassociate expressions", false, false); + "Reassociate expressions", false, false) // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } void Reassociate::RemoveDeadBinaryOp(Value *V) { Instruction *Op = dyn_cast<Instruction>(V); - if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty()) + if (!Op || !isa<BinaryOperator>(Op)) return; Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); ValueRankMap.erase(Op); - Op->eraseFromParent(); + DeadInsts.push_back(Op); RemoveDeadBinaryOp(LHS); RemoveDeadBinaryOp(RHS); } @@ -127,7 +132,9 @@ if (I->getOpcode() == Instruction::PHI || I->getOpcode() == Instruction::Alloca || I->getOpcode() == Instruction::Load || - I->getOpcode() == Instruction::Call || + I->getOpcode() == Instruction::Invoke || + (I->getOpcode() == Instruction::Call && + !isa<DbgInfoIntrinsic>(I)) || I->getOpcode() == Instruction::UDiv || I->getOpcode() == Instruction::SDiv || I->getOpcode() == Instruction::FDiv || @@ -206,10 +213,11 @@ DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { Constant *Cst = Constant::getAllOnesValue(Neg->getType()); - Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, Neg); + Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); ValueRankMap.erase(Neg); Res->takeName(Neg); Neg->replaceAllUsesWith(Res); + Res->setDebugLoc(Neg->getDebugLoc()); Neg->eraseFromParent(); return Res; } @@ -236,6 +244,12 @@ RHS->setOperand(0, LHS); I->setOperand(0, RHS); + // Conservatively clear all the optional flags, which may not hold + // after the reassociation. + I->clearSubclassOptionalData(); + LHS->clearSubclassOptionalData(); + RHS->clearSubclassOptionalData(); + ++NumLinear; MadeChange = true; DEBUG(dbgs() << "Linearized: " << *I << '\n'); @@ -295,7 +309,7 @@ std::swap(LHS, RHS); bool Success = !I->swapOperands(); assert(Success && "swapOperands failed"); - Success = false; + (void)Success; MadeChange = true; } else if (RHSBO) { // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not @@ -337,6 +351,12 @@ DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(0, Ops[i].Op); I->setOperand(1, Ops[i+1].Op); + + // Clear all the optional flags, which may not hold after the + // reassociation if the expression involved more than just this operation. + if (Ops.size() != 2) + I->clearSubclassOptionalData(); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -352,6 +372,11 @@ if (I->getOperand(1) != Ops[i].Op) { DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(1, Ops[i].Op); + + // Conservatively clear all the optional flags, which may not hold + // after the reassociation. + I->clearSubclassOptionalData(); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -421,7 +446,9 @@ BasicBlock::iterator InsertPt; if (Instruction *InstInput = dyn_cast<Instruction>(V)) { - { + if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { + InsertPt = II->getNormalDest()->begin(); + } else { InsertPt = InstInput; ++InsertPt; } @@ -435,7 +462,7 @@ // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - return BinaryOperator::CreateNeg(V, BI); + return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); } /// ShouldBreakUpSubtract - Return true if we should break up this subtract of @@ -474,12 +501,13 @@ // Value *NegVal = NegateValue(Sub->getOperand(1), Sub); Instruction *New = - BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, Sub); + BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); New->takeName(Sub); // Everyone now refers to the add instruction. ValueRankMap.erase(Sub); Sub->replaceAllUsesWith(New); + New->setDebugLoc(Sub->getDebugLoc()); Sub->eraseFromParent(); DEBUG(dbgs() << "Negated: " << *New << '\n'); @@ -501,10 +529,11 @@ MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); Instruction *Mul = - BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, Shl); + BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); ValueRankMap.erase(Shl); Mul->takeName(Shl); Shl->replaceAllUsesWith(Mul); + Mul->setDebugLoc(Shl->getDebugLoc()); Shl->eraseFromParent(); return Mul; } @@ -536,7 +565,7 @@ Value *V1 = Ops.back(); Ops.pop_back(); Value *V2 = EmitAddTreeOfValues(I, Ops); - return BinaryOperator::CreateAdd(V2, V1, I); + return BinaryOperator::CreateAdd(V2, V1, "tmp", I); } /// RemoveFactorFromExpression - If V is an expression tree that is a @@ -580,7 +609,7 @@ // remaining operand. if (Factors.size() == 1) { ValueRankMap.erase(BO); - BO->eraseFromParent(); + DeadInsts.push_back(BO); V = Factors[0].Op; } else { RewriteExprTree(BO, Factors); @@ -588,7 +617,7 @@ } if (NeedsNegate) - V = BinaryOperator::CreateNeg(V, InsertPt); + V = BinaryOperator::CreateNeg(V, "neg", InsertPt); return V; } @@ -704,12 +733,12 @@ // Insert a new multiply. Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); - Mul = BinaryOperator::CreateMul(TheOp, Mul, I); + Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); // Now that we have inserted a multiply, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 - Mul = ReassociateExpression(cast<BinaryOperator>(Mul)); + RedoInsts.push_back(Mul); // If every add operand was a duplicate, return the multiply. if (Ops.empty()) @@ -783,7 +812,7 @@ // because we can percolate the negate out. Watch for minint, which // cannot be positivified. if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) - if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) { + if (CI->isNegative() && !CI->isMinValue(true)) { Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); assert(!Duplicates.count(Factor) && "Shouldn't have two constant factors, missed a canonicalize"); @@ -805,16 +834,23 @@ // RemoveFactorFromExpression on successive values to behave differently. Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); SmallVector<Value*, 4> NewMulOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) continue; if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { - NewMulOps.push_back(V); - Ops.erase(Ops.begin()+i); - --i; --e; + // The factorized operand may occur several times. Convert them all in + // one fell swoop. + for (unsigned j = Ops.size(); j != i;) { + --j; + if (Ops[j].Op == Ops[i].Op) { + NewMulOps.push_back(V); + Ops.erase(Ops.begin()+j); + } + } + --i; } } @@ -832,7 +868,7 @@ V = ReassociateExpression(cast<BinaryOperator>(V)); // Create the multiply. - Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, I); + Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); // Rerun associate on the multiply in case the inner expression turned into // a multiply. We want to make sure that we keep things in canonical form. @@ -930,71 +966,69 @@ } -/// ReassociateBB - Inspect all of the instructions in this basic block, -/// reassociating them as we go. -void Reassociate::ReassociateBB(BasicBlock *BB) { - for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { - Instruction *BI = BBI++; - if (BI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { - MadeChange = true; - BI = NI; - } - - // Reject cases where it is pointless to do this. - if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || - BI->getType()->isVectorTy()) - continue; // Floating point ops are not associative. - - // Do not reassociate boolean (i1) expressions. We want to preserve the - // original order of evaluation for short-circuited comparisons that - // SimplifyCFG has folded to AND/OR expressions. If the expression - // is not further optimized, it is likely to be transformed back to a - // short-circuited form for code gen, and the source order may have been - // optimized for the most likely conditions. - if (BI->getType()->isIntegerTy(1)) - continue; - - // If this is a subtract instruction which is not already in negate form, - // see if we can convert it to X+-Y. - if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); - // Reset the BBI iterator in case BreakUpSubtract changed the - // instruction it points to. - BBI = BI; - ++BBI; - MadeChange = true; - } else if (BinaryOperator::isNeg(BI)) { - // Otherwise, this is a negation. See if the operand is a multiply tree - // and if this is not an inner node of a multiply tree. - if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && - (!BI->hasOneUse() || - !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap); - MadeChange = true; - } - } +/// ReassociateInst - Inspect and reassociate the instruction at the +/// given position, post-incrementing the position. +void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { + Instruction *BI = BBI++; + if (BI->getOpcode() == Instruction::Shl && + isa<ConstantInt>(BI->getOperand(1))) + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { + MadeChange = true; + BI = NI; } - // If this instruction is a commutative binary operator, process it. - if (!BI->isAssociative()) continue; - BinaryOperator *I = cast<BinaryOperator>(BI); + // Reject cases where it is pointless to do this. + if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || + BI->getType()->isVectorTy()) + return; // Floating point ops are not associative. - // If this is an interior node of a reassociable tree, ignore it until we - // get to the root of the tree, to avoid N^2 analysis. - if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) - continue; + // Do not reassociate boolean (i1) expressions. We want to preserve the + // original order of evaluation for short-circuited comparisons that + // SimplifyCFG has folded to AND/OR expressions. If the expression + // is not further optimized, it is likely to be transformed back to a + // short-circuited form for code gen, and the source order may have been + // optimized for the most likely conditions. + if (BI->getType()->isIntegerTy(1)) + return; - // If this is an add tree that is used by a sub instruction, ignore it - // until we process the subtract. - if (I->hasOneUse() && I->getOpcode() == Instruction::Add && - cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) - continue; - - ReassociateExpression(I); + // If this is a subtract instruction which is not already in negate form, + // see if we can convert it to X+-Y. + if (BI->getOpcode() == Instruction::Sub) { + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); + // Reset the BBI iterator in case BreakUpSubtract changed the + // instruction it points to. + BBI = BI; + ++BBI; + MadeChange = true; + } else if (BinaryOperator::isNeg(BI)) { + // Otherwise, this is a negation. See if the operand is a multiply tree + // and if this is not an inner node of a multiply tree. + if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && + (!BI->hasOneUse() || + !isReassociableOp(BI->use_back(), Instruction::Mul))) { + BI = LowerNegateToMultiply(BI, ValueRankMap); + MadeChange = true; + } + } } + + // If this instruction is a commutative binary operator, process it. + if (!BI->isAssociative()) return; + BinaryOperator *I = cast<BinaryOperator>(BI); + + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) + return; + + // If this is an add tree that is used by a sub instruction, ignore it + // until we process the subtract. + if (I->hasOneUse() && I->getOpcode() == Instruction::Add && + cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) + return; + + ReassociateExpression(I); } Value *Reassociate::ReassociateExpression(BinaryOperator *I) { @@ -1021,6 +1055,8 @@ // eliminate it. DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); + if (Instruction *VI = dyn_cast<Instruction>(V)) + VI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); ++NumAnnihil; return V; @@ -1044,6 +1080,8 @@ // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); + if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) + OI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); return Ops[0].Op; } @@ -1061,7 +1099,21 @@ MadeChange = false; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - ReassociateBB(FI); + for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) + ReassociateInst(BBI); + + // Now that we're done, revisit any instructions which are likely to + // have secondary reassociation opportunities. + while (!RedoInsts.empty()) + if (Value *V = RedoInsts.pop_back_val()) { + BasicBlock::iterator BBI = cast<Instruction>(V); + ReassociateInst(BBI); + } + + // Now that we're done, delete any instructions which are no longer used. + while (!DeadInsts.empty()) + if (Value *V = DeadInsts.pop_back_val()) + RecursivelyDeleteTriviallyDeadInstructions(V); // We are done with the rank map. RankMap.clear();
diff --git a/src/LLVM/lib/Transforms/Scalar/Reg2Mem.cpp b/src/LLVM/lib/Transforms/Scalar/Reg2Mem.cpp new file mode 100644 index 0000000..47afc77 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -0,0 +1,134 @@ +//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file demotes all registers to memory references. It is intented to be +// the inverse of PromoteMemoryToRegister. By converting to loads, the only +// values live across basic blocks are allocas and loads before phi nodes. +// It is intended that this should make CFG hacking much easier. +// To make later hacking easier, the entry block is split into two, such that +// all introduced allocas and nothing else are in the entry block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reg2mem" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include <list> +using namespace llvm; + +STATISTIC(NumRegsDemoted, "Number of registers demoted"); +STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); + +namespace { + struct RegToMem : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + RegToMem() : FunctionPass(ID) { + initializeRegToMemPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addPreservedID(BreakCriticalEdgesID); + } + + bool valueEscapes(const Instruction *Inst) const { + const BasicBlock *BB = Inst->getParent(); + for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end(); + UI != E; ++UI) { + const Instruction *I = cast<Instruction>(*UI); + if (I->getParent() != BB || isa<PHINode>(I)) + return true; + } + return false; + } + + virtual bool runOnFunction(Function &F); + }; +} + +char RegToMem::ID = 0; +INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots", + false, false) +INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) +INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots", + false, false) + +bool RegToMem::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + // Insert all new allocas into entry block. + BasicBlock *BBEntry = &F.getEntryBlock(); + assert(pred_begin(BBEntry) == pred_end(BBEntry) && + "Entry block to function must not have predecessors!"); + + // Find first non-alloca instruction and create insertion point. This is + // safe if block is well-formed: it always have terminator, otherwise + // we'll get and assertion. + BasicBlock::iterator I = BBEntry->begin(); + while (isa<AllocaInst>(I)) ++I; + + CastInst *AllocaInsertionPoint = + new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), + Type::getInt32Ty(F.getContext()), + "reg2mem alloca point", I); + + // Find the escaped instructions. But don't create stack slots for + // allocas in entry block. + std::list<Instruction*> WorkList; + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) { + if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) && + valueEscapes(iib)) { + WorkList.push_front(&*iib); + } + } + + // Demote escaped instructions + NumRegsDemoted += WorkList.size(); + for (std::list<Instruction*>::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemoteRegToStack(**ilb, false, AllocaInsertionPoint); + + WorkList.clear(); + + // Find all phi's + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) + if (isa<PHINode>(iib)) + WorkList.push_front(&*iib); + + // Demote phi nodes + NumPhisDemoted += WorkList.size(); + for (std::list<Instruction*>::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint); + + return true; +} + + +// createDemoteRegisterToMemory - Provide an entry point to create this pass. +// +char &llvm::DemoteRegisterToMemoryID = RegToMem::ID; +FunctionPass *llvm::createDemoteRegisterToMemoryPass() { + return new RegToMem(); +}
diff --git a/src/LLVM/lib/Transforms/Scalar/SCCP.cpp b/src/LLVM/lib/Transforms/Scalar/SCCP.cpp index 576fb1a..196a847 100644 --- a/src/LLVM/lib/Transforms/Scalar/SCCP.cpp +++ b/src/LLVM/lib/Transforms/Scalar/SCCP.cpp
@@ -156,7 +156,7 @@ /// class SCCPSolver : public InstVisitor<SCCPSolver> { const TargetData *TD; - SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable. + SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. /// StructValueState - This maintains ValueState for values that have @@ -241,7 +241,7 @@ /// this method must be called. void AddTrackedFunction(Function *F) { // Add an entry, F -> undef. - if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) { + if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) { MRVFunctionsTracked.insert(F); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i), @@ -275,12 +275,12 @@ return I->second; } - LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { + /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = StructValueState.find(std::make_pair(V, i)); assert(I != StructValueState.end() && "V is not in valuemap!"); return I->second; - } + }*/ /// getTrackedRetVals - Get the inferred return value map. /// @@ -302,7 +302,7 @@ /// markAnythingOverdefined - Mark the specified value overdefined. This /// works with both scalars and structs. void markAnythingOverdefined(Value *V) { - if (const StructType *STy = dyn_cast<StructType>(V->getType())) + if (StructType *STy = dyn_cast<StructType>(V->getType())) for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) markOverdefined(getStructValueState(V, i), V); else @@ -417,7 +417,7 @@ else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) LV.markConstant(CS->getOperand(i)); // Constants are constant. else if (isa<ConstantAggregateZero>(C)) { - const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i); + Type *FieldTy = cast<StructType>(V->getType())->getElementType(i); LV.markConstant(Constant::getNullValue(FieldTy)); } else LV.markOverdefined(); // Unknown sort of constant. @@ -471,9 +471,9 @@ /// UsersOfOverdefinedPHIs map for PN, remove them now. void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) { if (UsersOfOverdefinedPHIs.empty()) return; - std::multimap<PHINode*, Instruction*>::iterator It, E; - tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN); - while (It != E) { + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN); + for (ItTy It = Range.first, E = Range.second; It != E;) { if (It->second == I) UsersOfOverdefinedPHIs.erase(It++); else @@ -481,6 +481,19 @@ } } + /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS + /// map for I and PN, but if one is there already, do not create another. + /// (Duplicate entries do not break anything directly, but can lead to + /// exponential growth of the table in rare cases.) + void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) { + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN); + for (ItTy J = Range.first, E = Range.second; J != E; ++J) + if (J->second == I) + return; + UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I)); + } + private: friend class InstVisitor<SCCPSolver>; @@ -502,6 +515,7 @@ void visitShuffleVectorInst(ShuffleVectorInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); + void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); } // Instructions that cannot be folded away. void visitStoreInst (StoreInst &I); @@ -510,10 +524,18 @@ void visitCallInst (CallInst &I) { visitCallSite(&I); } + void visitInvokeInst (InvokeInst &II) { + visitCallSite(&II); + visitTerminatorInst(II); + } void visitCallSite (CallSite CS); + void visitResumeInst (TerminatorInst &I) { /*returns void*/ } + void visitUnwindInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } + void visitFenceInst (FenceInst &I) { /*returns void*/ } + void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); } + void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVANextInst (Instruction &I) { markOverdefined(&I); } void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); } void visitInstruction(Instruction &I) { @@ -553,7 +575,17 @@ return; } + if (isa<InvokeInst>(TI)) { + // Invoke instructions successors are always executable. + Succs[0] = Succs[1] = true; + return; + } + if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) { + if (TI.getNumSuccessors() < 2) { + Succs[0] = true; + return; + } LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); @@ -609,7 +641,14 @@ return BI->getSuccessor(CI->isZero()) == To; } + // Invoke instructions successors are always executable. + if (isa<InvokeInst>(TI)) + return true; + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + if (SI->getNumSuccessors() < 2) + return true; + LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); @@ -628,7 +667,7 @@ // Just mark all destinations executable! // TODO: This could be improved if the operand is a [cast of a] BlockAddress. - if (isa<IndirectBrInst>(&TI)) + if (isa<IndirectBrInst>(TI)) return true; #ifndef NDEBUG @@ -665,13 +704,14 @@ // There may be instructions using this PHI node that are not overdefined // themselves. If so, make sure that they know that the PHI node operand // changed. - std::multimap<PHINode*, Instruction*>::iterator I, E; - tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN); - if (I == E) + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN); + + if (Range.first == Range.second) return; SmallVector<Instruction*, 16> Users; - for (; I != E; ++I) + for (ItTy I = Range.first, E = Range.second; I != E; ++I) Users.push_back(I->second); while (!Users.empty()) visit(Users.pop_back_val()); @@ -745,7 +785,7 @@ // Handle functions that return multiple values. if (!TrackedMultipleRetVals.empty()) { - if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType())) + if (StructType *STy = dyn_cast<StructType>(ResultOp->getType())) if (MRVFunctionsTracked.count(F)) for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F, @@ -798,7 +838,7 @@ } void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { - const StructType *STy = dyn_cast<StructType>(IVI.getType()); + StructType *STy = dyn_cast<StructType>(IVI.getType()); if (STy == 0) return markOverdefined(&IVI); @@ -898,7 +938,7 @@ // Could annihilate value. if (I.getOpcode() == Instruction::And) markConstant(IV, &I, Constant::getNullValue(I.getType())); - else if (const VectorType *PT = dyn_cast<VectorType>(I.getType())) + else if (VectorType *PT = dyn_cast<VectorType>(I.getType())) markConstant(IV, &I, Constant::getAllOnesValue(PT)); else markConstant(IV, &I, @@ -959,9 +999,9 @@ if (Result.isConstant()) { markConstant(IV, &I, Result.getConstant()); // Remember that this instruction is virtually using the PHI node - // operands. - UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I)); - UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I)); + // operands. + InsertInOverdefinedPHIs(&I, PN1); + InsertInOverdefinedPHIs(&I, PN2); return; } @@ -1042,8 +1082,8 @@ markConstant(&I, Result.getConstant()); // Remember that this instruction is virtually using the PHI node // operands. - UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I)); - UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I)); + InsertInOverdefinedPHIs(&I, PN1); + InsertInOverdefinedPHIs(&I, PN2); return; } @@ -1152,8 +1192,8 @@ } Constant *Ptr = Operands[0]; - markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1, - Operands.size()-1)); + ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end()); + markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices)); } void SCCPSolver::visitStoreInst(StoreInst &SI) { @@ -1251,7 +1291,7 @@ // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size())) + if (Constant *C = ConstantFoldCall(F, Operands)) return markConstant(I, C); } @@ -1276,7 +1316,7 @@ continue; } - if (const StructType *STy = dyn_cast<StructType>(AI->getType())) { + if (StructType *STy = dyn_cast<StructType>(AI->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { LatticeVal CallArg = getStructValueState(*CAI, i); mergeInValue(getStructValueState(AI, i), AI, CallArg); @@ -1288,7 +1328,7 @@ } // If this is a single/zero retval case, see if we're tracking the function. - if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) { + if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) { if (!MRVFunctionsTracked.count(F)) goto CallOverdefined; // Not tracking this callee. @@ -1392,67 +1432,116 @@ // Look for instructions which produce undef values. if (I->getType()->isVoidTy()) continue; - if (const StructType *STy = dyn_cast<StructType>(I->getType())) { - // Only a few things that can be structs matter for undef. Just send - // all their results to overdefined. We could be more precise than this - // but it isn't worth bothering. - if (isa<CallInst>(I) || isa<SelectInst>(I)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - LatticeVal &LV = getStructValueState(I, i); - if (LV.isUndefined()) - markOverdefined(LV, I); - } + if (StructType *STy = dyn_cast<StructType>(I->getType())) { + // Only a few things that can be structs matter for undef. + + // Tracked calls must never be marked overdefined in ResolvedUndefsIn. + if (CallSite CS = CallSite(I)) + if (Function *F = CS.getCalledFunction()) + if (MRVFunctionsTracked.count(F)) + continue; + + // extractvalue and insertvalue don't need to be marked; they are + // tracked as precisely as their operands. + if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)) + continue; + + // Send the results of everything else to overdefined. We could be + // more precise than this but it isn't worth bothering. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + LatticeVal &LV = getStructValueState(I, i); + if (LV.isUndefined()) + markOverdefined(LV, I); } continue; } - + LatticeVal &LV = getValueState(I); if (!LV.isUndefined()) continue; - // No instructions using structs need disambiguation. - if (I->getOperand(0)->getType()->isStructTy()) + // extractvalue is safe; check here because the argument is a struct. + if (isa<ExtractValueInst>(I)) continue; - // Get the lattice values of the first two operands for use below. + // Compute the operand LatticeVals, for convenience below. + // Anything taking a struct is conservatively assumed to require + // overdefined markings. + if (I->getOperand(0)->getType()->isStructTy()) { + markOverdefined(I); + return true; + } LatticeVal Op0LV = getValueState(I->getOperand(0)); LatticeVal Op1LV; if (I->getNumOperands() == 2) { - // No instructions using structs need disambiguation. - if (I->getOperand(1)->getType()->isStructTy()) - continue; - - // If this is a two-operand instruction, and if both operands are - // undefs, the result stays undef. + if (I->getOperand(1)->getType()->isStructTy()) { + markOverdefined(I); + return true; + } + Op1LV = getValueState(I->getOperand(1)); - if (Op0LV.isUndefined() && Op1LV.isUndefined()) - continue; } - // If this is an instructions whose result is defined even if the input is // not fully defined, propagate the information. - const Type *ITy = I->getType(); + Type *ITy = I->getType(); switch (I->getOpcode()) { - default: break; // Leave the instruction as an undef. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + break; // Any undef -> undef + case Instruction::FSub: + case Instruction::FAdd: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + // Floating-point binary operation: be conservative. + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + markForcedConstant(I, Constant::getNullValue(ITy)); + else + markOverdefined(I); + return true; case Instruction::ZExt: - // After a zero extend, we know the top part is zero. SExt doesn't have - // to be handled here, because we don't know whether the top part is 1's - // or 0's. - case Instruction::SIToFP: // some FP values are not possible, just use 0. - case Instruction::UIToFP: // some FP values are not possible, just use 0. + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + // undef -> 0; some outputs are impossible markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Mul: case Instruction::And: + // Both operands undef -> undef + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + break; // undef * X -> 0. X could be zero. // undef & X -> 0. X could be zero. markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Or: + // Both operands undef -> undef + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + break; // undef | X -> -1. X could be -1. markForcedConstant(I, Constant::getAllOnesValue(ITy)); return true; + case Instruction::Xor: + // undef ^ undef -> 0; strictly speaking, this is not strictly + // necessary, but we try to be nice to people who expect this + // behavior in simple cases + if (Op0LV.isUndefined() && Op1LV.isUndefined()) { + markForcedConstant(I, Constant::getNullValue(ITy)); + return true; + } + // undef ^ X -> undef + break; + case Instruction::SDiv: case Instruction::UDiv: case Instruction::SRem: @@ -1467,26 +1556,24 @@ return true; case Instruction::AShr: - // undef >>s X -> undef. No change. - if (Op0LV.isUndefined()) break; - - // X >>s undef -> X. X could be 0, X could have the high-bit known set. - if (Op0LV.isConstant()) - markForcedConstant(I, Op0LV.getConstant()); - else - markOverdefined(I); + // X >>a undef -> undef. + if (Op1LV.isUndefined()) break; + + // undef >>a X -> all ones + markForcedConstant(I, Constant::getAllOnesValue(ITy)); return true; case Instruction::LShr: case Instruction::Shl: - // undef >> X -> undef. No change. - // undef << X -> undef. No change. - if (Op0LV.isUndefined()) break; - - // X >> undef -> 0. X could be 0. - // X << undef -> 0. X could be 0. + // X << undef -> undef. + // X >> undef -> undef. + if (Op1LV.isUndefined()) break; + + // undef << X -> 0 + // undef >> X -> 0 markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Select: + Op1LV = getValueState(I->getOperand(1)); // undef ? X : Y -> X or Y. There could be commonality between X/Y. if (Op0LV.isUndefined()) { if (!Op1LV.isConstant()) // Pick the constant one if there is any. @@ -1506,9 +1593,35 @@ else markOverdefined(I); return true; + case Instruction::Load: + // A load here means one of two things: a load of undef from a global, + // a load from an unknown pointer. Either way, having it return undef + // is okay. + break; + case Instruction::ICmp: + // X == undef -> undef. Other comparisons get more complicated. + if (cast<ICmpInst>(I)->isEquality()) + break; + markOverdefined(I); + return true; case Instruction::Call: - // If a call has an undef result, it is because it is constant foldable - // but one of the inputs was undef. Just force the result to + case Instruction::Invoke: { + // There are two reasons a call can have an undef result + // 1. It could be tracked. + // 2. It could be constant-foldable. + // Because of the way we solve return values, tracked calls must + // never be marked overdefined in ResolvedUndefsIn. + if (Function *F = CallSite(I).getCalledFunction()) + if (TrackedRetVals.count(F)) + break; + + // If the call is constant-foldable, we mark it overdefined because + // we do not know what return values are valid. + markOverdefined(I); + return true; + } + default: + // If we don't know what should happen here, conservatively mark it // overdefined. markOverdefined(I); return true; @@ -1571,22 +1684,20 @@ /// struct SCCP : public FunctionPass { static char ID; // Pass identification, replacement for typeid - SCCP() : FunctionPass(ID) {} + SCCP() : FunctionPass(ID) { + initializeSCCPPass(*PassRegistry::getPassRegistry()); + } // runOnFunction - Run the Sparse Conditional Constant Propagation // algorithm, and return true if the function was modified. // bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - } }; } // end anonymous namespace char SCCP::ID = 0; INITIALIZE_PASS(SCCP, "sccp", - "Sparse Conditional Constant Propagation", false, false); + "Sparse Conditional Constant Propagation", false, false) // createSCCPPass - This is the public interface to this file. FunctionPass *llvm::createSCCPPass() { @@ -1596,15 +1707,25 @@ static void DeleteInstructionInBlock(BasicBlock *BB) { DEBUG(dbgs() << " BasicBlock Dead:" << *BB); ++NumDeadBlocks; - - // Delete the instructions backwards, as it has a reduced likelihood of - // having to update as many def-use and use-def chains. - while (!isa<TerminatorInst>(BB->begin())) { - Instruction *I = --BasicBlock::iterator(BB->getTerminator()); - - if (!I->use_empty()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - BB->getInstList().erase(I); + + // Check to see if there are non-terminating instructions to delete. + if (isa<TerminatorInst>(BB->begin())) + return; + + // Delete the instructions backwards, as it has a reduced likelihood of having + // to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; + } + BB->getInstList().erase(Inst); ++NumInstRemoved; } } @@ -1687,7 +1808,9 @@ /// struct IPSCCP : public ModulePass { static char ID; - IPSCCP() : ModulePass(ID) {} + IPSCCP() : ModulePass(ID) { + initializeIPSCCPPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); }; } // end anonymous namespace @@ -1695,7 +1818,7 @@ char IPSCCP::ID = 0; INITIALIZE_PASS(IPSCCP, "ipsccp", "Interprocedural Sparse Conditional Constant Propagation", - false, false); + false, false) // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { @@ -1713,7 +1836,7 @@ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == GV || SI->isVolatile()) return true; // Storing addr of GV. - } else if (isa<CallInst>(U)) { + } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) { // Make sure we are calling the function, not passing the address. ImmutableCallSite CS(cast<Instruction>(U)); if (!CS.isCallee(UI)) @@ -1734,6 +1857,13 @@ bool IPSCCP::runOnModule(Module &M) { SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + // AddressTakenFunctions - This set keeps track of the address-taken functions + // that are in the input. As IPSCCP runs through and simplifies code, + // functions that were address taken can end up losing their + // address-taken-ness. Because of this, we keep track of their addresses from + // the first pass so we can use them for the later simplification pass. + SmallPtrSet<Function*, 32> AddressTakenFunctions; + // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. // @@ -1749,9 +1879,13 @@ // If this function only has direct calls that we can see, we can track its // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. - if (F->hasLocalLinkage() && !AddressIsTaken(F)) { - Solver.AddArgumentTrackedFunction(F); - continue; + if (F->hasLocalLinkage()) { + if (AddressIsTaken(F)) + AddressTakenFunctions.insert(F); + else { + Solver.AddArgumentTrackedFunction(F); + continue; + } } // Assume the function is called. @@ -1936,7 +2070,7 @@ continue; // We can only do this if we know that nothing else can call the function. - if (!F->hasLocalLinkage() || AddressIsTaken(F)) + if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F)) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) @@ -1951,7 +2085,7 @@ ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); } - // If we infered constant or undef values for globals variables, we can delete + // If we inferred constant or undef values for globals variables, we can delete // the global and any stores that remain to it. const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals(); for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
diff --git a/src/LLVM/lib/Transforms/Scalar/Scalar.cpp b/src/LLVM/lib/Transforms/Scalar/Scalar.cpp new file mode 100644 index 0000000..f6918de --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/Scalar.cpp
@@ -0,0 +1,192 @@ +//===-- Scalar.cpp --------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common infrastructure for libLLVMScalarOpts.a, which +// implements several scalar transformations over the LLVM intermediate +// representation, including the C bindings for that library. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Transforms/Scalar.h" +#include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +/// initializeScalarOptsPasses - Initialize all passes linked into the +/// ScalarOpts library. +void llvm::initializeScalarOpts(PassRegistry &Registry) { + initializeADCEPass(Registry); + initializeBlockPlacementPass(Registry); + initializeCodeGenPreparePass(Registry); + initializeConstantPropagationPass(Registry); + initializeCorrelatedValuePropagationPass(Registry); + initializeDCEPass(Registry); + initializeDeadInstEliminationPass(Registry); + initializeDSEPass(Registry); + initializeGVNPass(Registry); + initializeEarlyCSEPass(Registry); + initializeIndVarSimplifyPass(Registry); + initializeJumpThreadingPass(Registry); + initializeLICMPass(Registry); + initializeLoopDeletionPass(Registry); + initializeLoopInstSimplifyPass(Registry); + initializeLoopRotatePass(Registry); + initializeLoopStrengthReducePass(Registry); + initializeLoopUnrollPass(Registry); + initializeLoopUnswitchPass(Registry); + initializeLoopIdiomRecognizePass(Registry); + initializeLowerAtomicPass(Registry); + initializeLowerExpectIntrinsicPass(Registry); + initializeMemCpyOptPass(Registry); + initializeObjCARCAliasAnalysisPass(Registry); + initializeObjCARCExpandPass(Registry); + initializeObjCARCContractPass(Registry); + initializeObjCARCOptPass(Registry); + initializeReassociatePass(Registry); + initializeRegToMemPass(Registry); + initializeSCCPPass(Registry); + initializeIPSCCPPass(Registry); + initializeSROA_DTPass(Registry); + initializeSROA_SSAUpPass(Registry); + initializeCFGSimplifyPassPass(Registry); + initializeSimplifyLibCallsPass(Registry); + initializeSinkingPass(Registry); + initializeTailCallElimPass(Registry); +} + +void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { + initializeScalarOpts(*unwrap(R)); +} + +void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createAggressiveDCEPass()); +} + +void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCFGSimplificationPass()); +} + +void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadStoreEliminationPass()); +} + +void LLVMAddGVNPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGVNPass()); +} + +void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIndVarSimplifyPass()); +} + +void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createInstructionCombiningPass()); +} + +void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createJumpThreadingPass()); +} + +void LLVMAddLICMPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLICMPass()); +} + +void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopDeletionPass()); +} + +void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopIdiomPass()); +} + +void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopRotatePass()); +} + +void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopUnrollPass()); +} + +void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopUnswitchPass()); +} + +void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createMemCpyOptPass()); +} + +void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createPromoteMemoryToRegisterPass()); +} + +void LLVMAddReassociatePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createReassociatePass()); +} + +void LLVMAddSCCPPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createSCCPPass()); +} + +void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarReplAggregatesPass()); +} + +void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarReplAggregatesPass(-1, false)); +} + +void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, + int Threshold) { + unwrap(PM)->add(createScalarReplAggregatesPass(Threshold)); +} + +void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createSimplifyLibCallsPass()); +} + +void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTailCallEliminationPass()); +} + +void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createConstantPropagationPass()); +} + +void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDemoteRegisterToMemoryPass()); +} + +void LLVMAddVerifierPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createVerifierPass()); +} + +void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCorrelatedValuePropagationPass()); +} + +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createEarlyCSEPass()); +} + +void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); +} + +void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createBasicAliasAnalysisPass()); +} + +void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerExpectIntrinsicPass()); +}
diff --git a/src/LLVM/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/src/LLVM/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 80d1b82..c6d9123 100644 --- a/src/LLVM/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/src/LLVM/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -28,30 +28,39 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); +STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - explicit SROA(signed T = -1) : FunctionPass(ID) { + SROA(int T, bool hasDT, char &ID) + : FunctionPass(ID), HasDomTree(hasDT) { if (T == -1) SRThreshold = 128; else @@ -63,17 +72,10 @@ bool performScalarRepl(Function &F); bool performPromotion(Function &F); - // getAnalysisUsage - This pass does not require any passes, but we know it - // will not alter the CFG, so say so. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); - AU.setPreservesCFG(); - } - private: + bool HasDomTree; TargetData *TD; - + /// DeadInsts - Keep track of instructions we have made dead, so that /// we can remove them after we are done working. SmallVector<Value*, 32> DeadInsts; @@ -82,46 +84,70 @@ /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. struct AllocaInfo { + /// The alloca to promote. + AllocaInst *AI; + + /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite + /// looping and avoid redundant work. + SmallPtrSet<PHINode*, 8> CheckedPHIs; + /// isUnsafe - This is set to true if the alloca cannot be SROA'd. bool isUnsafe : 1; - + /// isMemCpySrc - This is true if this aggregate is memcpy'd from. bool isMemCpySrc : 1; /// isMemCpyDst - This is true if this aggregate is memcpy'd into. bool isMemCpyDst : 1; - AllocaInfo() - : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {} + /// hasSubelementAccess - This is true if a subelement of the alloca is + /// ever accessed, or false if the alloca is only accessed with mem + /// intrinsics or load/store that only access the entire alloca at once. + bool hasSubelementAccess : 1; + + /// hasALoadOrStore - This is true if there are any loads or stores to it. + /// The alloca may just be accessed with memcpy, for example, which would + /// not set this. + bool hasALoadOrStore : 1; + + explicit AllocaInfo(AllocaInst *ai) + : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), + hasSubelementAccess(false), hasALoadOrStore(false) {} }; - + unsigned SRThreshold; - void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } + void MarkUnsafe(AllocaInfo &I, Instruction *User) { + I.isUnsafe = true; + DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); + } bool isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - AllocaInfo &Info); - void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, - AllocaInfo &Info); - void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, AllocaInfo &Info); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy); - - void DoScalarReplacement(AllocaInst *AI, + void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); + void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset, + AllocaInfo &Info); + void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); + void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, + Type *MemOpType, bool isStore, AllocaInfo &Info, + Instruction *TheAccess, bool AllowWholeAccess); + bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); + uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy); + + void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); void DeleteDeadInstructions(); - AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); + void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts); void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); @@ -129,18 +155,64 @@ SmallVector<AllocaInst*, 32> &NewElts); void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); - - static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); + + static MemTransferInst *isOnlyCopiedFromConstantGlobal( + AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; + + // SROA_DT - SROA that uses DominatorTree. + struct SROA_DT : public SROA { + static char ID; + public: + SROA_DT(int T = -1) : SROA(T, true, ID) { + initializeSROA_DTPass(*PassRegistry::getPassRegistry()); + } + + // getAnalysisUsage - This pass does not require any passes, but we know it + // will not alter the CFG, so say so. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); + } + }; + + // SROA_SSAUp - SROA that uses SSAUpdater. + struct SROA_SSAUp : public SROA { + static char ID; + public: + SROA_SSAUp(int T = -1) : SROA(T, false, ID) { + initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); + } + + // getAnalysisUsage - This pass does not require any passes, but we know it + // will not alter the CFG, so say so. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + }; + } -char SROA::ID = 0; -INITIALIZE_PASS(SROA, "scalarrepl", - "Scalar Replacement of Aggregates", false, false); +char SROA_DT::ID = 0; +char SROA_SSAUp::ID = 0; + +INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl", + "Scalar Replacement of Aggregates (DT)", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(SROA_DT, "scalarrepl", + "Scalar Replacement of Aggregates (DT)", false, false) + +INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa", + "Scalar Replacement of Aggregates (SSAUp)", false, false) +INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", + "Scalar Replacement of Aggregates (SSAUp)", false, false) // Public interface to the ScalarReplAggregates pass -FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { - return new SROA(Threshold); +FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, + bool UseDomTree) { + if (UseDomTree) + return new SROA_DT(Threshold); + return new SROA_SSAUp(Threshold); } @@ -153,47 +225,65 @@ /// optimization, which scans the uses of an alloca and determines if it can /// rewrite it in terms of a single new alloca that can be mem2reg'd. class ConvertToScalarInfo { - /// AllocaSize - The size of the alloca being considered. + /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; - + /// IsNotTrivial - This is set to true if there is some access to the object /// which means that mem2reg can't promote it. bool IsNotTrivial; - + + /// ScalarKind - Tracks the kind of alloca being considered for promotion, + /// computed based on the uses of the alloca rather than the LLVM type system. + enum { + Unknown, + + // Accesses via GEPs that are consistent with element access of a vector + // type. This will not be converted into a vector unless there is a later + // access using an actual vector type. + ImplicitVector, + + // Accesses via vector operations and GEPs that are consistent with the + // layout of a vector type. + Vector, + + // An integer bag-of-bits with bitwise operations for insertion and + // extraction. Any combination of types can be converted into this kind + // of scalar. + Integer + } ScalarKind; + /// VectorTy - This tracks the type that we should promote the vector to if /// it is possible to turn it into a vector. This starts out null, and if it /// isn't possible to turn into a vector type, it gets set to VoidTy. - const Type *VectorTy; - - /// HadAVector - True if there is at least one vector access to the alloca. - /// We don't want to turn random arrays into vectors and use vector element - /// insert/extract, but if there are element accesses to something that is - /// also declared as a vector, we do want to promote to a vector. - bool HadAVector; + VectorType *VectorTy; + + /// HadNonMemTransferAccess - True if there is at least one access to the + /// alloca that is not a MemTransferInst. We don't want to turn structs into + /// large integers unless there is some potential for optimization. + bool HadNonMemTransferAccess; public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td) { - IsNotTrivial = false; - VectorTy = 0; - HadAVector = false; - } - + : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), + VectorTy(0), HadNonMemTransferAccess(false) { } + AllocaInst *TryConvert(AllocaInst *AI); - + private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset); + void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); + bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); - - Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, + + Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, uint64_t Offset, IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, uint64_t Offset, IRBuilder<> &Builder); }; } // end anonymous namespace. + /// TryConvert - Analyze the specified alloca, and if it is safe to do so, /// rewrite it to be a new alloca which is mem2reg'able. This returns the new /// alloca if possible or null if not. @@ -202,30 +292,45 @@ // out. if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) return 0; - + + // If an alloca has only memset / memcpy uses, it may still have an Unknown + // ScalarKind. Treat it as an Integer below. + if (ScalarKind == Unknown) + ScalarKind = Integer; + + if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) + ScalarKind = Integer; + // If we were able to find a vector type that can handle this with // insert/extract elements, and if there was at least one use that had // a vector type, promote this to a vector. We don't want to promote // random stuff that doesn't use vectors (e.g. <9 x double>) because then // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. - const Type *NewTy; - if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + Type *NewTy; + if (ScalarKind == Vector) { + assert(VectorTy && "Missing type for vector scalar."); DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { + unsigned BitWidth = AllocaSize * 8; + if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && + !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) + return 0; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + NewTy = IntegerType::get(AI->getContext(), BitWidth); } - AllocaInst *NewAI = new AllocaInst(NewTy, 0, AI->getParent()->begin()); + AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); return NewAI; } -/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) -/// so far at the offset specified by Offset (which is specified in bytes). +/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type +/// (VectorTy) so far at the offset specified by Offset (which is specified in +/// bytes). /// /// There are two cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. @@ -236,50 +341,65 @@ /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { +void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, + uint64_t Offset) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. - if (VectorTy && VectorTy->isVoidTy()) + if (ScalarKind == Integer) return; - + // If this could be contributing to a vector, analyze it. // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. - if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { - // Remember if we saw a vector type. - HadAVector = true; - - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. If this is a subsequent access, ignore it - // even if it is a differing type but the same size. Worst case we can - // bitcast the resultant vectors. - if (VectorTy == 0) - VectorTy = VInTy; + if (VectorType *VInTy = dyn_cast<VectorType>(In)) { + if (MergeInVectorType(VInTy, Offset)) return; - } } else if (In->isFloatTy() || In->isDoubleTy() || (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // Full width accesses can be ignored, because they can always be turned + // into bitcasts. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (EltSize == AllocaSize) + return; + // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (VectorTy == 0 || - cast<VectorType>(VectorTy)->getElementType() - ->getPrimitiveSizeInBits()/8 == EltSize)) { - if (VectorTy == 0) + (!VectorTy || EltSize == VectorTy->getElementType() + ->getPrimitiveSizeInBits()/8)) { + if (!VectorTy) { + ScalarKind = ImplicitVector; VectorTy = VectorType::get(In, AllocaSize/EltSize); + } return; } } - + // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. - VectorTy = Type::getVoidTy(In->getContext()); + ScalarKind = Integer; +} + +/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, +/// returning true if the type was successfully merged and false otherwise. +bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, + uint64_t Offset) { + if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { + // If we're storing/loading a vector of the right size, allow it as a + // vector. If this the first vector we see, remember the type so that + // we know the element size. If this is a subsequent access, ignore it + // even if it is a differing type but the same size. Worst case we can + // bitcast the resultant vectors. + if (!VectorTy) + VectorTy = VInTy; + ScalarKind = Vector; + return true; + } + + return false; } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all @@ -294,24 +414,33 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { Instruction *User = cast<Instruction>(*UI); - + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // Don't break volatile loads. - if (LI->isVolatile()) + if (!LI->isSimple()) return false; - MergeInType(LI->getType(), Offset); + // Don't touch MMX operations. + if (LI->getType()->isX86_MMXTy()) + return false; + HadNonMemTransferAccess = true; + MergeInTypeForLoadOrStore(LI->getType(), Offset); continue; } - + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Storing the pointer, not into the value? - if (SI->getOperand(0) == V || SI->isVolatile()) return false; - MergeInType(SI->getOperand(0)->getType(), Offset); + if (SI->getOperand(0) == V || !SI->isSimple()) return false; + // Don't touch MMX operations. + if (SI->getOperand(0)->getType()->isX86_MMXTy()) + return false; + HadNonMemTransferAccess = true; + MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset); continue; } - + if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { - IsNotTrivial = true; // Can't be mem2reg'd. + if (!onlyUsedByLifetimeMarkers(BCI)) + IsNotTrivial = true; // Can't be mem2reg'd. if (!CanConvertToScalar(BCI, Offset)) return false; continue; @@ -321,26 +450,40 @@ // If this is a GEP with a variable indices, we can't handle it. if (!GEP->hasAllConstantIndices()) return false; - + // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); // See if all uses can be converted. if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } // If this is a constant sized memset of a constant value (e.g. 0) we can // handle it. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { - // Store of constant value and constant size. - if (!isa<ConstantInt>(MSI->getValue()) || - !isa<ConstantInt>(MSI->getLength())) + // Store of constant value. + if (!isa<ConstantInt>(MSI->getValue())) return false; + + // Store of constant size. + ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength()); + if (!Len) + return false; + + // If the size differs from the alloca, we can only convert the alloca to + // an integer bag-of-bits. + // FIXME: This should handle all of the cases that are currently accepted + // as vector element insertions. + if (Len->getZExtValue() != AllocaSize || Offset != 0) + ScalarKind = Integer; + IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -350,15 +493,23 @@ ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) return false; - + IsNotTrivial = true; // Can't be mem2reg'd. continue; } - + + // If this is a lifetime intrinsic, we can handle it. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + continue; + } + } + // Otherwise, we cannot handle this! return false; } - + return true; } @@ -384,14 +535,14 @@ // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); continue; } - - IRBuilder<> Builder(User->getParent(), User); - + + IRBuilder<> Builder(User); + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // The load is a bit extract from NewAI shifted right by Offset bits. Value *LoadedVal = Builder.CreateLoad(NewAI); @@ -401,22 +552,22 @@ LI->eraseFromParent(); continue; } - + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); - Instruction *Old = Builder.CreateLoad(NewAI); + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, Builder); Builder.CreateStore(New, NewAI); SI->eraseFromParent(); - + // If the load we just inserted is now dead, then the inserted store // overwrote the entire thing. if (Old->use_empty()) Old->eraseFromParent(); continue; } - + // If this is a constant sized memset of a constant value (e.g. 0) we can // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { @@ -424,7 +575,7 @@ unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); if (NumBytes != 0) { unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); - + // Compute the value replicated the right number of times. APInt APVal(NumBytes*8, Val); @@ -432,17 +583,17 @@ if (Val) for (unsigned i = 1; i != NumBytes; ++i) APVal |= APVal << 8; - - Instruction *Old = Builder.CreateLoad(NewAI); + + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), Old, Offset, Builder); Builder.CreateStore(New, NewAI); - + // If the load we just inserted is now dead, then the memset overwrote // the entire thing. if (Old->use_empty()) - Old->eraseFromParent(); + Old->eraseFromParent(); } MSI->eraseFromParent(); continue; @@ -452,29 +603,42 @@ // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { assert(Offset == 0 && "must be store to start of alloca"); - + // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store // as appropriate. - AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0)); - - if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { + AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0)); + + if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) { // Dest must be OrigAI, change this to be a load from the original // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); Value *SrcPtr = MTI->getSource(); - SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); - + PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { + AIPTy = PointerType::get(AIPTy->getElementType(), + SPTy->getAddressSpace()); + } + SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); + LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); SrcVal->setAlignment(MTI->getAlignment()); Builder.CreateStore(SrcVal, NewAI); - } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { + } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) { // Src must be OrigAI, change this to be a load from NewAI then a store // through the original dest pointer (bitcasted). assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); - Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); + PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { + AIPTy = PointerType::get(AIPTy->getElementType(), + DPTy->getAddressSpace()); + } + Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); + StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); NewStore->setAlignment(MTI->getAlignment()); } else { @@ -484,7 +648,17 @@ MTI->eraseFromParent(); continue; } - + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + // There's no need to preserve these, as the resulting alloca will be + // converted to a register anyways. + II->eraseFromParent(); + continue; + } + } + llvm_unreachable("Unsupported operation!"); } } @@ -500,17 +674,20 @@ /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. Value *ConvertToScalarInfo:: -ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, +ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) + Type *FromType = FromVal->getType(); + if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { - if (ToType->isVectorTy()) - return Builder.CreateBitCast(FromVal, ToType); + if (VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned FromTypeSize = TD.getTypeAllocSize(FromType); + unsigned ToTypeSize = TD.getTypeAllocSize(ToType); + if (FromTypeSize == ToTypeSize) + return Builder.CreateBitCast(FromVal, ToType); // Otherwise it must be an element access. unsigned Elt = 0; @@ -520,16 +697,15 @@ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( - Type::getInt32Ty(FromVal->getContext()), Elt)); + Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); if (V->getType() != ToType) V = Builder.CreateBitCast(V, ToType); return V; } - + // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. - if (const StructType *ST = dyn_cast<StructType>(ToType)) { + if (StructType *ST = dyn_cast<StructType>(ToType)) { const StructLayout &Layout = *TD.getStructLayout(ST); Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { @@ -540,8 +716,8 @@ } return Res; } - - if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { + + if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { @@ -553,7 +729,7 @@ } // Otherwise, this must be a union that was converted to an integer value. - const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); + IntegerType *NTy = cast<IntegerType>(FromVal->getType()); // If this is a big-endian system and the load is narrower than the // full alloca type, we need to do a shift to get the right bits. @@ -573,22 +749,20 @@ // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateLShr(FromVal, - ConstantInt::get(FromVal->getType(), - ShAmt)); + ConstantInt::get(FromVal->getType(), ShAmt)); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateShl(FromVal, - ConstantInt::get(FromVal->getType(), - -ShAmt)); + FromVal = Builder.CreateShl(FromVal, + ConstantInt::get(FromVal->getType(), -ShAmt)); // Finally, unconditionally truncate the integer to the right width. unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) FromVal = - Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), + Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth)); else if (LIBitWidth > NTy->getBitWidth()) FromVal = - Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), + Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth)); // If the result is an integer, this is a trunc or bitcast. @@ -619,44 +793,38 @@ uint64_t Offset, IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. - const Type *AllocaType = Old->getType(); + Type *AllocaType = Old->getType(); LLVMContext &Context = Old->getContext(); - if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { + if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); - + // Changing the whole vector with memset or with an access of a different // vector type? if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType); - - uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + return Builder.CreateBitCast(SV, AllocaType); // Must be an element insertion. + assert(SV->getType() == VTy->getElementType()); + uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); unsigned Elt = Offset/EltSize; - - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType()); - - SV = Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt)); - return SV; + return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); } - + // If SV is a first-class aggregate value, insert each value recursively. - if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { + if (StructType *ST = dyn_cast<StructType>(SV->getType())) { const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, + Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), Builder); } return Old; } - - if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { + + if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); @@ -672,8 +840,7 @@ unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) - SV = Builder.CreateBitCast(SV, - IntegerType::get(SV->getContext(),SrcWidth)); + SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); else if (SV->getType()->isPointerTy()) SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext())); @@ -708,12 +875,10 @@ // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), - ShAmt)); + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt)); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), - -ShAmt)); + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt)); Mask = Mask.lshr(-ShAmt); } @@ -721,8 +886,8 @@ // in the new bits. if (SrcWidth != DestWidth) { assert(DestWidth > SrcWidth); - Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask)); - SV = Builder.CreateOr(Old, SV); + Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); + SV = Builder.CreateOr(Old, SV, "ins"); } return SV; } @@ -755,16 +920,382 @@ return Changed; } +namespace { +class AllocaPromoter : public LoadAndStorePromoter { + AllocaInst *AI; + DIBuilder *DIB; + SmallVector<DbgDeclareInst *, 4> DDIs; + SmallVector<DbgValueInst *, 4> DVIs; +public: + AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, + DIBuilder *DB) + : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} + + void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { + // Remember which alloca we're promoting (for isInstInList). + this->AI = AI; + if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + DDIs.push_back(DDI); + else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) + DVIs.push_back(DVI); + + LoadAndStorePromoter::run(Insts); + AI->eraseFromParent(); + for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + DDI->eraseFromParent(); + } + for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + DVI->eraseFromParent(); + } + } + + virtual bool isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) const { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getOperand(0) == AI; + return cast<StoreInst>(I)->getPointerOperand() == AI; + } + + virtual void updateDebugInfo(Instruction *Inst) const { + for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); + } + for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + Instruction *DbgVal = NULL; + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = NULL; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) + DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, + DIVariable(DVI->getVariable()), + SI); + else + DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVariable(DVI->getVariable()), + SI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + Instruction *DbgVal = + DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVariable(DVI->getVariable()), LI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } + } + } +}; +} // end anon namespace + +/// isSafeSelectToSpeculate - Select instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers and then +/// select between the result, allowing the load of the alloca to be promoted. +/// From this: +/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// %V2 = load i32* %Other +/// %V = select i1 %cond, i32 %V1, i32 %V2 +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast<LoadInst>(*UI); + if (LI == 0 || !LI->isSimple()) return false; + + // Both operands to the select need to be dereferencable, either absolutely + // (e.g. allocas) or at this point because we can see other accesses to it. + if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, + LI->getAlignment(), TD)) + return false; + if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, + LI->getAlignment(), TD)) + return false; + } + + return true; +} + +/// isSafePHIToSpeculate - PHI instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers in the pred +/// blocks and then PHI the results, allowing the load of the alloca to be +/// promoted. +/// From this: +/// %P2 = phi [i32* %Alloca, i32* %Other] +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// ... +/// %V2 = load i32* %Other +/// ... +/// %V = phi [i32 %V1, i32 %V2] +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { + // For now, we can only do this promotion if the load is in the same block as + // the PHI, and if there are no stores between the phi and load. + // TODO: Allow recursive phi users. + // TODO: Allow stores. + BasicBlock *BB = PN->getParent(); + unsigned MaxAlign = 0; + for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast<LoadInst>(*UI); + if (LI == 0 || !LI->isSimple()) return false; + + // For now we only allow loads in the same block as the PHI. This is a + // common case that happens when instcombine merges two loads through a PHI. + if (LI->getParent() != BB) return false; + + // Ensure that there are no instructions between the PHI and the load that + // could store. + for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + MaxAlign = std::max(MaxAlign, LI->getAlignment()); + } + + // Okay, we know that we have one or more loads in the same block as the PHI. + // We can transform this if it is safe to push the loads into the predecessor + // blocks. The only thing to watch out for is that we can't put a possibly + // trapping load in the predecessor if it is a critical edge. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + Value *InVal = PN->getIncomingValue(i); + + // If the terminator of the predecessor has side-effects (an invoke), + // there is no safe place to put a load in the predecessor. + if (Pred->getTerminator()->mayHaveSideEffects()) + return false; + + // If the value is produced by the terminator of the predecessor + // (an invoke), there is no valid place to put a load in the predecessor. + if (Pred->getTerminator() == InVal) + return false; + + // If the predecessor has a single successor, then the edge isn't critical. + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + // If this pointer is always safe to load, or if we can prove that there is + // already a load in the block, then we can move the load to the pred block. + if (InVal->isDereferenceablePointer() || + isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) + continue; + + return false; + } + + return true; +} + + +/// tryToMakeAllocaBePromotable - This returns true if the alloca only has +/// direct (non-volatile) loads and stores to it. If the alloca is close but +/// not quite there, this will transform the code to allow promotion. As such, +/// it is a non-pure predicate. +static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { + SetVector<Instruction*, SmallVector<Instruction*, 4>, + SmallPtrSet<Instruction*, 4> > InstsToRewrite; + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (!LI->isSimple()) + return false; + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == AI || !SI->isSimple()) + return false; // Don't allow a store OF the AI, only INTO the AI. + continue; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(U)) { + // If the condition being selected on is a constant, fold the select, yes + // this does (rarely) happen early on. + if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) { + Value *Result = SI->getOperand(1+CI->isZero()); + SI->replaceAllUsesWith(Result); + SI->eraseFromParent(); + + // This is very rare and we just scrambled the use list of AI, start + // over completely. + return tryToMakeAllocaBePromotable(AI, TD); + } + + // If it is safe to turn "load (select c, AI, ptr)" into a select of two + // loads, then we can transform this by rewriting the select. + if (!isSafeSelectToSpeculate(SI, TD)) + return false; + + InstsToRewrite.insert(SI); + continue; + } + + if (PHINode *PN = dyn_cast<PHINode>(U)) { + if (PN->use_empty()) { // Dead PHIs can be stripped. + InstsToRewrite.insert(PN); + continue; + } + + // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads + // in the pred blocks, then we can transform this by rewriting the PHI. + if (!isSafePHIToSpeculate(PN, TD)) + return false; + + InstsToRewrite.insert(PN); + continue; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (onlyUsedByLifetimeMarkers(BCI)) { + InstsToRewrite.insert(BCI); + continue; + } + } + + return false; + } + + // If there are no instructions to rewrite, then all uses are load/stores and + // we're done! + if (InstsToRewrite.empty()) + return true; + + // If we have instructions that need to be rewritten for this to be promotable + // take care of it now. + for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) { + // This could only be a bitcast used by nothing but lifetime intrinsics. + for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end(); + I != E;) { + Use &U = I.getUse(); + ++I; + cast<Instruction>(U.getUser())->eraseFromParent(); + } + BCI->eraseFromParent(); + continue; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { + // Selects in InstsToRewrite only have load uses. Rewrite each as two + // loads with a new select. + while (!SI->use_empty()) { + LoadInst *LI = cast<LoadInst>(SI->use_back()); + + IRBuilder<> Builder(LI); + LoadInst *TrueLoad = + Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); + LoadInst *FalseLoad = + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); + + // Transfer alignment and TBAA info if present. + TrueLoad->setAlignment(LI->getAlignment()); + FalseLoad->setAlignment(LI->getAlignment()); + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { + TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + } + + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); + V->takeName(LI); + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); + } + + // Now that all the loads are gone, the select is gone too. + SI->eraseFromParent(); + continue; + } + + // Otherwise, we have a PHI node which allows us to push the loads into the + // predecessors. + PHINode *PN = cast<PHINode>(InstsToRewrite[i]); + if (PN->use_empty()) { + PN->eraseFromParent(); + continue; + } + + Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), + PN->getName()+".ld", PN); + + // Get the TBAA tag and alignment to use from one of the loads. It doesn't + // matter which one we get and if any differ, it doesn't matter. + LoadInst *SomeLoad = cast<LoadInst>(PN->use_back()); + MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); + unsigned Align = SomeLoad->getAlignment(); + + // Rewrite all loads of the PN to use the new PHI. + while (!PN->use_empty()) { + LoadInst *LI = cast<LoadInst>(PN->use_back()); + LI->replaceAllUsesWith(NewPN); + LI->eraseFromParent(); + } + + // Inject loads into all of the pred blocks. Keep track of which blocks we + // insert them into in case we have multiple edges from the same block. + DenseMap<BasicBlock*, LoadInst*> InsertedLoads; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + LoadInst *&Load = InsertedLoads[Pred]; + if (Load == 0) { + Load = new LoadInst(PN->getIncomingValue(i), + PN->getName() + "." + Pred->getName(), + Pred->getTerminator()); + Load->setAlignment(Align); + if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); + } + + NewPN->addIncoming(Load, Pred); + } + + PN->eraseFromParent(); + } + + ++NumAdjusted; + return true; +} bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; - DominatorTree &DT = getAnalysis<DominatorTree>(); - DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); + DominatorTree *DT = 0; + if (HasDomTree) + DT = &getAnalysis<DominatorTree>(); BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function - + DIBuilder DIB(*F.getParent()); bool Changed = false; - + SmallVector<Instruction*, 64> Insts; while (1) { Allocas.clear(); @@ -772,12 +1303,26 @@ // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) + if (tryToMakeAllocaBePromotable(AI, TD)) Allocas.push_back(AI); if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF); + if (HasDomTree) + PromoteMemToReg(Allocas, *DT); + else { + SSAUpdater SSA; + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { + AllocaInst *AI = Allocas[i]; + + // Build list of instructions to promote. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + Insts.push_back(cast<Instruction>(*UI)); + AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); + Insts.clear(); + } + } NumPromoted += Allocas.size(); Changed = true; } @@ -789,21 +1334,21 @@ /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for /// SROA. It must be a struct or array type with a small number of elements. static bool ShouldAttemptScalarRepl(AllocaInst *AI) { - const Type *T = AI->getAllocatedType(); + Type *T = AI->getAllocatedType(); // Do not promote any struct into more than 32 separate vars. - if (const StructType *ST = dyn_cast<StructType>(T)) + if (StructType *ST = dyn_cast<StructType>(T)) return ST->getNumElements() <= 32; // Arrays are much less likely to be safe for SROA; only consider // them if they are very small. - if (const ArrayType *AT = dyn_cast<ArrayType>(T)) + if (ArrayType *AT = dyn_cast<ArrayType>(T)) return AT->getNumElements() <= 8; return false; } // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the malloc/alloca instructions in the function, removing -// them if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the function, removing them +// if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -819,7 +1364,7 @@ while (!WorkList.empty()) { AllocaInst *AI = WorkList.back(); WorkList.pop_back(); - + // Handle dead allocas trivially. These can be formed by SROA'ing arrays // with unused elements. if (AI->use_empty()) { @@ -831,24 +1376,27 @@ // If this alloca is impossible for us to promote, reject it early. if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) continue; - + // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. - if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); - Constant *TheSrc = cast<Constant>(TheCopy->getSource()); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + ToDelete[i]->eraseFromParent(); + Constant *TheSrc = cast<Constant>(Copy->getSource()); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - TheCopy->eraseFromParent(); // Don't mutate the global. + Copy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); ++NumGlobals; Changed = true; continue; } - + // Check to see if we can perform the core SROA transformation. We cannot // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar @@ -857,10 +1405,10 @@ // Do not promote [0 x %struct]. if (AllocaSize == 0) continue; - + // Do not promote any struct whose size is too big. if (AllocaSize > SRThreshold) continue; - + // If the alloca looks like a good candidate for scalar replacement, and if // all its users can be transformed, then split up the aggregate into its // separate elements. @@ -883,8 +1431,8 @@ ++NumConverted; Changed = true; continue; - } - + } + // Otherwise, couldn't process this alloca. } @@ -893,26 +1441,26 @@ /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl /// predicate, do SROA now. -void SROA::DoScalarReplacement(AllocaInst *AI, +void SROA::DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList) { DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector<AllocaInst*, 32> ElementAllocas; - if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, + AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, AI->getAlignment(), - AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } } else { - const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); + ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); ElementAllocas.reserve(AT->getNumElements()); - const Type *ElTy = AT->getElementType(); + Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), - AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } @@ -948,48 +1496,110 @@ I->eraseFromParent(); } } - + /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to /// performing scalar replacement of alloca AI. The results are flagged in /// the Info parameter. Offset indicates the position within AI that is /// referenced by this instruction. -void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, +void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { - isSafeForScalarRepl(BC, AI, Offset, Info); + isSafeForScalarRepl(BC, Offset, Info); } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { uint64_t GEPOffset = Offset; - isSafeGEP(GEPI, AI, GEPOffset, Info); + isSafeGEP(GEPI, GEPOffset, Info); if (!Info.isUnsafe) - isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); + isSafeForScalarRepl(GEPI, GEPOffset, Info); } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); - if (Length) - isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == 0, Info); - else - MarkUnsafe(Info); + if (Length == 0) + return MarkUnsafe(Info, User); + isSafeMemAccess(Offset, Length->getZExtValue(), 0, + UI.getOperandNo() == 0, Info, MI, + true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (!LI->isVolatile()) { - const Type *LIType = LI->getType(); - isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType), - LIType, false, Info); - } else - MarkUnsafe(Info); + if (!LI->isSimple()) + return MarkUnsafe(Info, User); + Type *LIType = LI->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info, LI, true /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isVolatile() && SI->getOperand(0) != I) { - const Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType), - SIType, true, Info); - } else - MarkUnsafe(Info); + if (!SI->isSimple() || SI->getOperand(0) == I) + return MarkUnsafe(Info, User); + + Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info, SI, true /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return MarkUnsafe(Info, User); + } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { + isSafePHISelectUseForScalarRepl(User, Offset, Info); } else { - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - MarkUnsafe(Info); + return MarkUnsafe(Info, User); + } + if (Info.isUnsafe) return; + } +} + + +/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer +/// derived from the alloca, we can often still split the alloca into elements. +/// This is useful if we have a large alloca where one element is phi'd +/// together somewhere: we can SRoA and promote all the other elements even if +/// we end up not being able to promote this one. +/// +/// All we require is that the uses of the PHI do not index into other parts of +/// the alloca. The most important use case for this is single load and stores +/// that are PHI'd together, which can happen due to code sinking. +void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, + AllocaInfo &Info) { + // If we've already checked this PHI, don't do it again. + if (PHINode *PN = dyn_cast<PHINode>(I)) + if (!Info.CheckedPHIs.insert(PN)) + return; + + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { + isSafePHISelectUseForScalarRepl(BC, Offset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + // Only allow "bitcast" GEPs for simplicity. We could generalize this, + // but would have to prove that we're staying inside of an element being + // promoted. + if (!GEPI->hasAllZeroIndices()) + return MarkUnsafe(Info, User); + isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); + } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + if (!LI->isSimple()) + return MarkUnsafe(Info, User); + Type *LIType = LI->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info, LI, false /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (!SI->isSimple() || SI->getOperand(0) == I) + return MarkUnsafe(Info, User); + + Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info, SI, false /*AllowWholeAccess*/); + Info.hasALoadOrStore = true; + } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { + isSafePHISelectUseForScalarRepl(User, Offset, Info); + } else { + return MarkUnsafe(Info, User); } if (Info.isUnsafe) return; } @@ -1000,7 +1610,7 @@ /// references, and when the resulting offset corresponds to an element within /// the alloca type. The results are flagged in the Info parameter. Upon /// return, Offset is adjusted as specified by the GEP indices. -void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, +void SROA::isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info) { gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); if (GEPIt == E) @@ -1015,62 +1625,113 @@ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); if (!IdxVal) - return MarkUnsafe(Info); + return MarkUnsafe(Info, GEPI); } // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) - MarkUnsafe(Info); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); + if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) + MarkUnsafe(Info, GEPI); +} + +/// isHomogeneousAggregate - Check if type T is a struct or array containing +/// elements of the same type (which is always true for arrays). If so, +/// return true with NumElts and EltTy set to the number of elements and the +/// element type, respectively. +static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, + Type *&EltTy) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) { + NumElts = AT->getNumElements(); + EltTy = (NumElts == 0 ? 0 : AT->getElementType()); + return true; + } + if (StructType *ST = dyn_cast<StructType>(T)) { + NumElts = ST->getNumContainedTypes(); + EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); + for (unsigned n = 1; n < NumElts; ++n) { + if (ST->getContainedType(n) != EltTy) + return false; + } + return true; + } + return false; +} + +/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are +/// "homogeneous" aggregates with the same element type and number of elements. +static bool isCompatibleAggregate(Type *T1, Type *T2) { + if (T1 == T2) + return true; + + unsigned NumElts1, NumElts2; + Type *EltTy1, *EltTy2; + if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && + isHomogeneousAggregate(T2, NumElts2, EltTy2) && + NumElts1 == NumElts2 && + EltTy1 == EltTy2) + return true; + + return false; } /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI /// alloca or has an offset and size that corresponds to a component element /// within it. The offset checked here may have been formed from a GEP with a /// pointer bitcasted to a different type. -void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, - AllocaInfo &Info) { +/// +/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a +/// unit. If false, it only allows accesses known to be in a single element. +void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, + Type *MemOpType, bool isStore, + AllocaInfo &Info, Instruction *TheAccess, + bool AllowWholeAccess) { // Check if this is a load/store of the entire alloca. - if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); - // This is safe for MemIntrinsics (where MemOpType is 0), integer types - // (which are essentially the same as the MemIntrinsics, especially with - // regard to copying padding between elements), or references using the - // aggregate type of the alloca. - if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) { - if (!UsesAggregateType) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; - } + if (Offset == 0 && AllowWholeAccess && + MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) { + // This can be safe for MemIntrinsics (where MemOpType is 0) and integer + // loads/stores (which are essentially the same as the MemIntrinsics with + // regard to copying padding between elements). But, if an alloca is + // flagged as both a source and destination of such operations, we'll need + // to check later for padding between elements. + if (!MemOpType || MemOpType->isIntegerTy()) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; + return; + } + // This is also safe for references using a type that is compatible with + // the type of the alloca, so that loads/stores can be rewritten using + // insertvalue/extractvalue. + if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) { + Info.hasSubelementAccess = true; return; } } // Check if the offset/size correspond to a component within the alloca type. - const Type *T = AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize)) + Type *T = Info.AI->getAllocatedType(); + if (TypeHasComponent(T, Offset, MemSize)) { + Info.hasSubelementAccess = true; return; + } - return MarkUnsafe(Info); + return MarkUnsafe(Info, TheAccess); } /// TypeHasComponent - Return true if T has a component type with the /// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; +bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { + Type *EltTy; uint64_t EltSize; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); unsigned EltIdx = Layout->getElementContainingOffset(Offset); EltTy = ST->getContainedType(EltIdx); EltSize = TD->getTypeAllocSize(EltTy); Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { EltTy = AT->getElementType(); EltSize = TD->getTypeAllocSize(EltTy); if (Offset >= AT->getNumElements() * EltSize) @@ -1093,14 +1754,21 @@ /// instruction. void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast<Instruction>(*UI); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI++); if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { RewriteBitCast(BC, AI, Offset, NewElts); - } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + continue; + } + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { RewriteGEP(GEPI, AI, Offset, NewElts); - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { + continue; + } + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); uint64_t MemSize = Length->getZExtValue(); if (Offset == 0 && @@ -1108,9 +1776,21 @@ RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); // Otherwise the intrinsic can only touch a single element and the // address operand will be updated, so nothing else needs to be done. - } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - const Type *LIType = LI->getType(); - if (LIType == AI->getAllocatedType()) { + continue; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); + } + continue; + } + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + Type *LIType = LI->getType(); + + if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: // %res = load { i32, i32 }* %alloc // with: @@ -1120,9 +1800,10 @@ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 // (Also works for arrays instead of structs) Value *Insert = UndefValue::get(LIType); + IRBuilder<> Builder(LI); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = new LoadInst(NewElts[i], LI); - Insert = InsertValueInst::Create(Insert, Load, i, LI); + Value *Load = Builder.CreateLoad(NewElts[i], "load"); + Insert = Builder.CreateInsertValue(Insert, Load, i, "insert"); } LI->replaceAllUsesWith(Insert); DeadInsts.push_back(LI); @@ -1132,10 +1813,13 @@ // If this is a load of the entire alloca to an integer, rewrite it. RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); - if (SIType == AI->getAllocatedType()) { + Type *SIType = Val->getType(); + if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { // Replace: // store { i32, i32 } %val, { i32, i32 }* %alloc // with: @@ -1144,9 +1828,10 @@ // %val.1 = extractvalue { i32, i32 } %val, 1 // store i32 %val.1, i32* %alloc.1 // (Also works for arrays instead of structs) + IRBuilder<> Builder(SI); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, SI); - new StoreInst(Extract, NewElts[i], SI); + Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName()); + Builder.CreateStore(Extract, NewElts[i]); } DeadInsts.push_back(SI); } else if (SIType->isIntegerTy() && @@ -1155,6 +1840,26 @@ // If this is a store of the entire alloca from an integer, rewrite it. RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } + continue; + } + + if (isa<SelectInst>(User) || isa<PHINode>(User)) { + // If we have a PHI user of the alloca itself (as opposed to a GEP or + // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to + // the new pointer. + if (!isa<AllocaInst>(I)) continue; + + assert(Offset == 0 && NewElts[0] && + "Direct alloca use should have a zero offset"); + + // If we have a use of the alloca, we know the derived uses will be + // utilizing just the first element of the scalarized result. Insert a + // bitcast of the first alloca before the user as required. + AllocaInst *NewAI = NewElts[0]; + BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI); + NewAI->moveBefore(BCI); + TheUse = BCI; + continue; } } } @@ -1171,7 +1876,7 @@ // references to the first new element alloca. Instruction *Val = NewElts[0]; if (Val->getType() != BC->getDestTy()) { - Val = new BitCastInst(Val, BC->getDestTy(), BC); + Val = new BitCastInst(Val, BC->getDestTy(), "", BC); Val->takeName(BC); } BC->replaceAllUsesWith(Val); @@ -1183,10 +1888,10 @@ /// Sets T to the type of the element and Offset to the offset within that /// element. IdxTy is set to the type of the index result to be used in a /// GEP instruction. -uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy) { +uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy) { uint64_t Idx = 0; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); Idx = Layout->getElementContainingOffset(Offset); T = ST->getContainedType(Idx); @@ -1194,7 +1899,7 @@ IdxTy = Type::getInt32Ty(T->getContext()); return Idx; } - const ArrayType *AT = cast<ArrayType>(T); + ArrayType *AT = cast<ArrayType>(T); T = AT->getElementType(); uint64_t EltSize = TD->getTypeAllocSize(T); Idx = Offset / EltSize; @@ -1210,13 +1915,12 @@ SmallVector<AllocaInst*, 32> &NewElts) { uint64_t OldOffset = Offset; SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - const Type *T = AI->getAllocatedType(); - const Type *IdxTy; + Type *T = AI->getAllocatedType(); + Type *IdxTy; uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); if (GEPI->getOperand(0) == AI) OldIdx = ~0ULL; // Force the GEP to be rewritten. @@ -1230,7 +1934,7 @@ if (Idx == OldIdx) return; - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + Type *i32Ty = Type::getInt32Ty(AI->getContext()); SmallVector<Value*, 8> NewArgs; NewArgs.push_back(Constant::getNullValue(i32Ty)); while (EltOffset != 0) { @@ -1239,16 +1943,71 @@ } Instruction *Val = NewElts[Idx]; if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), - NewArgs.end(), GEPI); + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); Val->takeName(GEPI); } if (Val->getType() != GEPI->getType()) - Val = new BitCastInst(Val, GEPI->getType(), GEPI); + Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); GEPI->replaceAllUsesWith(Val); DeadInsts.push_back(GEPI); } +/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it +/// to mark the lifetime of the scalarized memory. +void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts) { + ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0)); + // Put matching lifetime markers on everything from Offset up to + // Offset+OldSize. + Type *AIType = AI->getAllocatedType(); + uint64_t NewOffset = Offset; + Type *IdxTy; + uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); + + IRBuilder<> Builder(II); + uint64_t Size = OldSize->getLimitedValue(); + + if (NewOffset) { + // Splice the first element and index 'NewOffset' bytes in. SROA will + // split the alloca again later. + Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); + V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); + + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset; + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); + ++Idx; + } + + for (; Idx != NewElts.size() && Size; ++Idx) { + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy); + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(NewElts[Idx], + Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(NewElts[Idx], + Builder.getInt64(EltSize)); + } + DeadInsts.push_back(II); +} + /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, @@ -1282,7 +2041,7 @@ // function is only called for mem intrinsics that access the whole // aggregate, so non-zero GEPs are not an issue here.) OtherPtr = OtherPtr->stripPointerCasts(); - + // Copying the alloca to itself is a no-op: just delete it. if (OtherPtr == AI || OtherPtr == NewElts[0]) { // This code will run twice for a no-op memcpy -- once for each operand. @@ -1293,43 +2052,42 @@ DeadInsts.push_back(MI); return; } - + // If the pointer is not the right type, insert a bitcast to the right // type. - const Type *NewTy = + Type *NewTy = PointerType::get(AI->getType()->getElementType(), AddrSpace); - + if (OtherPtr->getType() != NewTy) - OtherPtr = new BitCastInst(OtherPtr, NewTy, MI); + OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); } - + // Process each element of the aggregate. - Value *TheFn = MI->getCalledValue(); - const Type *BytePtrTy = MI->getRawDest()->getType(); bool SROADest = MI->getRawDest() == Inst; - + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - + if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, + OtherPtr->getName()+"."+Twine(i), MI); uint64_t EltOffset; - const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); - const Type *OtherTy = OtherPtrTy->getElementType(); - if (const StructType *ST = dyn_cast<StructType>(OtherTy)) { + PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); + Type *OtherTy = OtherPtrTy->getElementType(); + if (StructType *ST = dyn_cast<StructType>(OtherTy)) { EltOffset = TD->getStructLayout(ST)->getElementOffset(i); } else { - const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); + Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); EltOffset = TD->getTypeAllocSize(EltTy)*i; } - + // The alignment of the other pointer is the guaranteed alignment of the // element, which is affected by both the known alignment of the whole // mem intrinsic and the alignment of the element. If the alignment of @@ -1337,26 +2095,26 @@ // known alignment is just 4 bytes. OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); } - + Value *EltPtr = NewElts[i]; - const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); - + Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); + // If we got down to a scalar, insert a load or store as appropriate. if (EltTy->isSingleValueType()) { if (isa<MemTransferInst>(MI)) { if (SROADest) { // From Other to Alloca. - Value *Elt = new LoadInst(OtherElt, false, OtherEltAlign, MI); + Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); new StoreInst(Elt, EltPtr, MI); } else { // From Alloca to Other. - Value *Elt = new LoadInst(EltPtr, MI); + Value *Elt = new LoadInst(EltPtr, "tmp", MI); new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); } continue; } assert(isa<MemSetInst>(MI)); - + // If the stored element is zero (common case), just store a null // constant. Constant *StoreVal; @@ -1365,7 +2123,7 @@ StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy->getScalarType(); + Type *ValTy = EltTy->getScalarType(); // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); @@ -1376,7 +2134,7 @@ TotalVal = TotalVal.shl(8); TotalVal |= OneVal; } - + // Convert the integer value to the appropriate type. StoreVal = ConstantInt::get(CI->getContext(), TotalVal); if (ValTy->isPointerTy()) @@ -1384,12 +2142,12 @@ else if (ValTy->isFloatingPointTy()) StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); - + // If the requested value was a vector constant, create it. - if (EltTy != ValTy) { - unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); + if (EltTy->isVectorTy()) { + unsigned NumElts = cast<VectorType>(EltTy)->getNumElements(); SmallVector<Constant*, 16> Elts(NumElts, StoreVal); - StoreVal = ConstantVector::get(&Elts[0], NumElts); + StoreVal = ConstantVector::get(Elts); } } new StoreInst(StoreVal, EltPtr, MI); @@ -1398,54 +2156,24 @@ // Otherwise, if we're storing a byte variable, use a memset call for // this element. } - - // Cast the element pointer to BytePtrTy. - if (EltPtr->getType() != BytePtrTy) - EltPtr = new BitCastInst(EltPtr, BytePtrTy, MI); - - // Cast the other pointer (if we have one) to BytePtrTy. - if (OtherElt && OtherElt->getType() != BytePtrTy) { - // Preserve address space of OtherElt - const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType()); - const PointerType* PTy = cast<PointerType>(BytePtrTy); - if (OtherPTy->getElementType() != PTy->getElementType()) { - Type *NewOtherPTy = PointerType::get(PTy->getElementType(), - OtherPTy->getAddressSpace()); - OtherElt = new BitCastInst(OtherElt, NewOtherPTy, MI); - } - } - + unsigned EltSize = TD->getTypeAllocSize(EltTy); - + + IRBuilder<> Builder(MI); + // Finally, insert the meminst for this element. - if (isa<MemTransferInst>(MI)) { - Value *Ops[] = { - SROADest ? EltPtr : OtherElt, // Dest ptr - SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size - // Align - ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), - MI->getVolatileCst() - }; - // In case we fold the address space overloaded memcpy of A to B - // with memcpy of B to C, change the function to be a memcpy of A to C. - const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(), - Ops[2]->getType() }; - Module *M = MI->getParent()->getParent()->getParent(); - TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3); - CallInst::Create(TheFn, Ops, Ops + 5, "", MI); + if (isa<MemSetInst>(MI)) { + Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, + MI->isVolatile()); } else { - assert(isa<MemSetInst>(MI)); - Value *Ops[] = { - EltPtr, MI->getArgOperand(1), // Dest, Value, - ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size - Zero, // Align - ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile - }; - const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; - Module *M = MI->getParent()->getParent()->getParent(); - TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); - CallInst::Create(TheFn, Ops, Ops + 5, "", MI); + assert(isa<MemTransferInst>(MI)); + Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr + Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr + + if (isa<MemCpyInst>(MI)) + Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); + else + Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); } } DeadInsts.push_back(MI); @@ -1459,113 +2187,110 @@ // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + + IRBuilder<> Builder(SI); // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) - SrcVal = new ZExtInst(SrcVal, - IntegerType::get(SI->getContext(), AllocaSizeBits), - SI); + SrcVal = Builder.CreateZExt(SrcVal, + IntegerType::get(SI->getContext(), AllocaSizeBits)); DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { const StructLayout *Layout = TD->getStructLayout(EltSTy); - + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Get the number of bits to shift SrcVal to get the value. - const Type *FieldTy = EltSTy->getElementType(i); + Type *FieldTy = EltSTy->getElementType(i); uint64_t Shift = Layout->getElementOffsetInBits(i); - + if (TD->isBigEndian()) Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); - + Value *EltVal = SrcVal; if (Shift) { Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); - EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, SI); + EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); } - + // Truncate down to an integer of the right size. uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); - + // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - + if (FieldSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, - IntegerType::get(SI->getContext(), FieldSizeBits), - SI); + EltVal = Builder.CreateTrunc(EltVal, + IntegerType::get(SI->getContext(), FieldSizeBits)); Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). - EltVal = new BitCastInst(EltVal, FieldTy, SI); + EltVal = Builder.CreateBitCast(EltVal, FieldTy); } else { // Otherwise, bitcast the dest pointer (for aggregates). - DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), - SI); + DestField = Builder.CreateBitCast(DestField, + PointerType::getUnqual(EltVal->getType())); } new StoreInst(EltVal, DestField, SI); } - + } else { - const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); - const Type *ArrayEltTy = ATy->getElementType(); + ArrayType *ATy = cast<ArrayType>(AllocaEltTy); + Type *ArrayEltTy = ATy->getElementType(); uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); uint64_t Shift; - + if (TD->isBigEndian()) Shift = AllocaSizeBits-ElementOffset; - else + else Shift = 0; - + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Ignore zero sized fields like {}, they obviously contain no data. if (ElementSizeBits == 0) continue; - + Value *EltVal = SrcVal; if (Shift) { Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); - EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, - SI); + EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); } - + // Truncate down to an integer of the right size. if (ElementSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, - IntegerType::get(SI->getContext(), - ElementSizeBits), SI); + EltVal = Builder.CreateTrunc(EltVal, + IntegerType::get(SI->getContext(), + ElementSizeBits)); Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. } else if (ArrayEltTy->isFloatingPointTy() || ArrayEltTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). - EltVal = new BitCastInst(EltVal, ArrayEltTy, SI); + EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy); } else { // Otherwise, bitcast the dest pointer (for aggregates). - DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), - SI); + DestField = Builder.CreateBitCast(DestField, + PointerType::getUnqual(EltVal->getType())); } new StoreInst(EltVal, DestField, SI); - + if (TD->isBigEndian()) Shift -= ElementOffset; - else + else Shift += ElementOffset; } } - + DeadInsts.push_back(SI); } @@ -1575,126 +2300,121 @@ SmallVector<AllocaInst*, 32> &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - + DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); - + // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. const StructLayout *Layout = 0; uint64_t ArrayEltBitOffset = 0; - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { Layout = TD->getStructLayout(EltSTy); } else { - const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); + Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); - } - - Value *ResultVal = + } + + Value *ResultVal = Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); - + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Load the value from the alloca. If the NewElt is an aggregate, cast // the pointer to an integer of the same size before doing the load. Value *SrcField = NewElts[i]; - const Type *FieldTy = + Type *FieldTy = cast<PointerType>(SrcField->getType())->getElementType(); uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); - + // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - - const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + + IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), FieldSizeBits); if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && !FieldTy->isVectorTy()) SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), - LI); - SrcField = new LoadInst(SrcField, LI); + "", LI); + SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); // If SrcField is a fp or vector of the right size but that isn't an // integer type, bitcast to an integer so we can shift it. if (SrcField->getType() != FieldIntTy) - SrcField = new BitCastInst(SrcField, FieldIntTy, LI); + SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); // Zero extend the field to be the same size as the final alloca so that // we can shift and insert it. if (SrcField->getType() != ResultVal->getType()) - SrcField = new ZExtInst(SrcField, ResultVal->getType(), LI); - + SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); + // Determine the number of bits to shift SrcField. uint64_t Shift; if (Layout) // Struct case. Shift = Layout->getElementOffsetInBits(i); else // Array case. Shift = i*ArrayEltBitOffset; - + if (TD->isBigEndian()) Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); - + if (Shift) { Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); - SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, LI); + SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); } // Don't create an 'or x, 0' on the first iteration. if (!isa<Constant>(ResultVal) || !cast<Constant>(ResultVal)->isNullValue()) - ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, LI); + ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); else ResultVal = SrcField; } // Handle tail padding by truncating the result if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) - ResultVal = new TruncInst(ResultVal, LI->getType(), LI); + ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); DeadInsts.push_back(LI); } /// HasPadding - Return true if the specified type has any structure or -/// alignment padding, false otherwise. -static bool HasPadding(const Type *Ty, const TargetData &TD) { - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = TD.getStructLayout(STy); - unsigned PrevFieldBitOffset = 0; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - unsigned FieldBitOffset = SL->getElementOffsetInBits(i); - - // Padding in sub-elements? - if (HasPadding(STy->getElementType(i), TD)) - return true; - - // Check to see if there is any padding between this element and the - // previous one. - if (i) { - unsigned PrevFieldEnd = - PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); - if (PrevFieldEnd < FieldBitOffset) - return true; - } - - PrevFieldBitOffset = FieldBitOffset; - } - - // Check for tail padding. - if (unsigned EltCount = STy->getNumElements()) { - unsigned PrevFieldEnd = PrevFieldBitOffset + - TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); - if (PrevFieldEnd < SL->getSizeInBits()) - return true; - } - - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - return HasPadding(ATy->getElementType(), TD); - } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { - return HasPadding(VTy->getElementType(), TD); +/// alignment padding in between the elements that would be split apart +/// by SROA; return false otherwise. +static bool HasPadding(Type *Ty, const TargetData &TD) { + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Ty = ATy->getElementType(); + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } - return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); + + // SROA currently handles only Arrays and Structs. + StructType *STy = cast<StructType>(Ty); + const StructLayout *SL = TD.getStructLayout(STy); + unsigned PrevFieldBitOffset = 0; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned FieldBitOffset = SL->getElementOffsetInBits(i); + + // Check to see if there is any padding between this element and the + // previous one. + if (i) { + unsigned PrevFieldEnd = + PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); + if (PrevFieldEnd < FieldBitOffset) + return true; + } + PrevFieldBitOffset = FieldBitOffset; + } + // Check for tail padding. + if (unsigned EltCount = STy->getNumElements()) { + unsigned PrevFieldEnd = PrevFieldBitOffset + + TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); + if (PrevFieldEnd < SL->getSizeInBits()) + return true; + } + return false; } /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of @@ -1703,14 +2423,14 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // Loop over the use list of the alloca. We can only transform it if all of // the users are safe to transform. - AllocaInfo Info; - - isSafeForScalarRepl(AI, AI, 0, Info); + AllocaInfo Info(AI); + + isSafeForScalarRepl(AI, 0, Info); if (Info.isUnsafe) { DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); return false; } - + // Okay, we know all the users are promotable. If the aggregate is a memcpy // source and destination, we have to be careful. In particular, the memcpy // could be moving around elements that live in structure padding of the LLVM @@ -1720,6 +2440,20 @@ HasPadding(AI->getAllocatedType(), *TD)) return false; + // If the alloca never has an access to just *part* of it, but is accessed + // via loads and stores, then we should use ConvertToScalarInfo to promote + // the alloca instead of promoting each piece at a time and inserting fission + // and fusion code. + if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { + // If the struct/array just has one element, use basic SRoA. + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (ST->getNumElements() > 1) return false; + } else { + if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) + return false; + } + } + return true; } @@ -1732,7 +2466,7 @@ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return GV->isConstant(); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || + if (CE->getOpcode() == Instruction::BitCast || CE->getOpcode() == Instruction::GetElementPtr) return PointsToConstantGlobal(CE->getOperand(0)); return false; @@ -1743,21 +2477,29 @@ /// see any stores or other unknown uses. If we see pointer arithmetic, keep /// track of whether it moves the pointer (with isOffset) but otherwise traverse /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to -/// the alloca, and if the source pointer is a pointer to a constant global, we +/// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. -static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset) { +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + bool isOffset, + SmallVector<Instruction *, 4> &LifetimeMarkers) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { User *U = cast<Instruction>(*UI); - if (LoadInst *LI = dyn_cast<LoadInst>(U)) + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // Ignore non-volatile loads, they are always ok. - if (!LI->isVolatile()) - continue; - + if (!LI->isSimple()) return false; + continue; + } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, + LifetimeMarkers)) return false; continue; } @@ -1765,31 +2507,70 @@ // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices())) + isOffset || !GEP->hasAllZeroIndices(), + LifetimeMarkers)) return false; continue; } - + + if (CallSite CS = U) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) + continue; + + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || + CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) + continue; + } + + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + LifetimeMarkers.push_back(II); + continue; + } + } + // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(U); if (MI == 0) return false; + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + // If we already have seen a copy, reject the second one. if (TheCopy) return false; - + // If the pointer has been offset from the start of the alloca, we can't // safely handle this. if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. if (UI.getOperandNo() != 0) return false; - + // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) return false; - + // Otherwise, the transform is safe. Remember the copy instruction. TheCopy = MI; } @@ -1799,9 +2580,11 @@ /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { +MemTransferInst * +SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVector<Instruction*, 4> &ToDelete) { MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) + if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) return TheCopy; return 0; }
diff --git a/src/LLVM/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/src/LLVM/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 7a28f4e..a66b3e3 100644 --- a/src/LLVM/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/src/LLVM/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,7 +42,9 @@ namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGSimplifyPass() : FunctionPass(ID) {} + CFGSimplifyPass() : FunctionPass(ID) { + initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); }; @@ -50,7 +52,7 @@ char CFGSimplifyPass::ID = 0; INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg", - "Simplify the CFG", false, false); + "Simplify the CFG", false, false) // Public interface to the CFGSimplification pass FunctionPass *llvm::createCFGSimplificationPass() { @@ -71,7 +73,8 @@ if (UseLLVMTrap) { Function *TrapFn = Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); - CallInst::Create(TrapFn, "", I); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); } new UnreachableInst(I->getContext(), I); @@ -84,6 +87,25 @@ } } +/// ChangeToCall - Convert the specified invoke into a normal call. +static void ChangeToCall(InvokeInst *II) { + BasicBlock *BB = II->getParent(); + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Follow the call by a branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Update PHI nodes in the unwind destination + II->getUnwindDest()->removePredecessor(BB); + BB->getInstList().erase(II); +} + static bool MarkAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { @@ -134,7 +156,14 @@ } } - Changed |= ConstantFoldTerminator(BB); + // Turn invokes that call 'nounwind' functions into ordinary calls. + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) + if (II->doesNotThrow()) { + ChangeToCall(II); + Changed = true; + } + + Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) Worklist.push_back(*SI); } while (!Worklist.empty()); @@ -198,9 +227,9 @@ BasicBlock::iterator I = Ret; --I; // Skip over debug info. - while (ISA_DEBUG_INFO_INTRINSIC(I) && I != BB.begin()) + while (isa<DbgInfoIntrinsic>(I) && I != BB.begin()) --I; - if (!ISA_DEBUG_INFO_INTRINSIC(I) && + if (!isa<DbgInfoIntrinsic>(I) && (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 || Ret->getOperand(0) != I)) @@ -231,11 +260,12 @@ PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); + pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), + std::distance(PB, PE), "merge", &RetBlock->front()); - for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock); - PI != E; ++PI) + for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } @@ -259,10 +289,9 @@ while (LocalChange) { LocalChange = false; - // Loop over all of the basic blocks (except the first one) and remove them - // if they are unneeded... + // Loop over all of the basic blocks and remove them if they are unneeded... // - for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) { + for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { if (SimplifyCFG(BBIt++, TD)) { LocalChange = true; ++NumSimpl;
diff --git a/src/LLVM/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/src/LLVM/lib/Transforms/Scalar/SimplifyLibCalls.cpp new file mode 100644 index 0000000..fbb9465 --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -0,0 +1,2396 @@ +//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple pass that applies a variety of small +// optimizations for calls to specific well-known function calls (e.g. runtime +// library functions). Any optimization that takes the very simple form +// "replace call to library function with simpler code that provides the same +// result" belongs in this file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplify-libcalls" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! +using namespace llvm; + +STATISTIC(NumSimplified, "Number of library calls simplified"); +STATISTIC(NumAnnotated, "Number of attributes added to library functions"); + +//===----------------------------------------------------------------------===// +// Optimizer Base Class +//===----------------------------------------------------------------------===// + +/// This class is the abstract base class for the set of optimizations that +/// corresponds to one library call. +namespace { +class LibCallOptimization { +protected: + Function *Caller; + const TargetData *TD; + const TargetLibraryInfo *TLI; + LLVMContext* Context; +public: + LibCallOptimization() { } + virtual ~LibCallOptimization() {} + + /// CallOptimizer - This pure virtual method is implemented by base classes to + /// do various optimizations. If this returns null then no transformation was + /// performed. If it returns CI, then it transformed the call and CI is to be + /// deleted. If it returns something else, replace CI with the new value and + /// delete CI. + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + =0; + + Value *OptimizeCall(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, IRBuilder<> &B) { + Caller = CI->getParent()->getParent(); + this->TD = TD; + this->TLI = TLI; + if (CI->getCalledFunction()) + Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + + return CallOptimizer(CI->getCalledFunction(), CI, B); + } +}; +} // End anonymous namespace. + + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static bool CallHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} + +/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// +// String and Memory LibCall Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'strcat' Optimizations +namespace { +struct StrCatOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + // These optimizations require TargetData. + if (!TD) return 0; + + EmitStrLenMemCpy(Src, Dst, Len, B); + return Dst; + } + + void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = EmitStrLen(Dst, B, TD); + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + } +}; + +//===---------------------------------------===// +// 'strncat' Optimizations + +struct StrNCatOpt : public StrCatOpt { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + uint64_t Len; + + // We don't do anything if length is not constant + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Len = LengthArg->getZExtValue(); + else + return 0; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; // Unbias length. + + // Handle the simple, do-nothing cases: + // strncat(x, "", c) -> x + // strncat(x, c, 0) -> x + if (SrcLen == 0 || Len == 0) return Dst; + + // These optimizations require TargetData. + if (!TD) return 0; + + // We don't optimize this case + if (Len < SrcLen) return 0; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further + EmitStrLenMemCpy(Src, Dst, SrcLen, B); + return Dst; + } +}; + +//===---------------------------------------===// +// 'strchr' Optimizations + +struct StrChrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (CharC == 0) { + // These optimizations require TargetData. + if (!TD) return 0; + + uint64_t Len = GetStringLength(SrcStr); + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. + return 0; + + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(TD->getIntPtrType(*Context), Len), + B, TD); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + std::string Str; + if (!GetConstantStringInfo(SrcStr, Str)) + return 0; + + // strchr can find the nul character. + Str += '\0'; + + // Compute the offset. + size_t I = Str.find(CharC->getSExtValue()); + if (I == std::string::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + + // strchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + } +}; + +//===---------------------------------------===// +// 'strrchr' Optimizations + +struct StrRChrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strrchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + + // Cannot fold anything if we're not looking for a constant. + if (!CharC) + return 0; + + std::string Str; + if (!GetConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (TD && CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TD); + return 0; + } + + // strrchr can find the nul character. + Str += '\0'; + + // Compute the offset. + size_t I = Str.rfind(CharC->getSExtValue()); + if (I == std::string::npos) // Didn't find the char. Return null. + return Constant::getNullValue(CI->getType()); + + // strrchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + } +}; + +//===---------------------------------------===// +// 'strcmp' Optimizations + +struct StrCmpOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + std::string Str1, Str2; + bool HasStr1 = GetConstantStringInfo(Str1P, Str1); + bool HasStr2 = GetConstantStringInfo(Str2P, Str2); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), + StringRef(Str1).compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + uint64_t Len2 = GetStringLength(Str2P); + if (Len1 && Len2) { + // These optimizations require TargetData. + if (!TD) return 0; + + return EmitMemCmp(Str1P, Str2P, + ConstantInt::get(TD->getIntPtrType(*Context), + std::min(Len1, Len2)), B, TD); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'strncmp' Optimizations + +struct StrNCmpOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Length = LengthArg->getZExtValue(); + else + return 0; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + + std::string Str1, Str2; + bool HasStr1 = GetConstantStringInfo(Str1P, Str1); + bool HasStr2 = GetConstantStringInfo(Str2P, Str2); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = StringRef(Str1).substr(0, Length); + StringRef SubStr2 = StringRef(Str2).substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + return 0; + } +}; + + +//===---------------------------------------===// +// 'strcpy' Optimizations + +struct StrCpyOpt : public LibCallOptimization { + bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall. + + StrCpyOpt(bool c) : OptChkCall(c) {} + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcpy" function prototype. + unsigned NumParams = OptChkCall ? 3 : 2; + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != NumParams || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + // These optimizations require TargetData. + if (!TD) return 0; + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + if (OptChkCall) + EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), + CI->getArgOperand(2), B, TD); + else + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + return Dst; + } +}; + +//===---------------------------------------===// +// 'strncpy' Optimizations + +struct StrNCpyOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(x, '\0', y, 1) + B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + return Dst; + } + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) + Len = LengthArg->getZExtValue(); + else + return 0; + + if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + + // These optimizations require TargetData. + if (!TD) return 0; + + // Let strncpy handle the zero padding + if (Len > SrcLen+1) return 0; + + // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + + return Dst; + } +}; + +//===---------------------------------------===// +// 'strlen' Optimizations + +struct StrLenOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + Value *Src = CI->getArgOperand(0); + + // Constant folding: strlen("xyz") -> 3 + if (uint64_t Len = GetStringLength(Src)) + return ConstantInt::get(CI->getType(), Len-1); + + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + if (IsOnlyUsedInZeroEqualityComparison(CI)) + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return 0; + } +}; + + +//===---------------------------------------===// +// 'strpbrk' Optimizations + +struct StrPBrkOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + FT->getReturnType() != FT->getParamType(0)) + return 0; + + std::string S1, S2; + bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> NULL + // strpbrk("", s) -> NULL + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == std::string::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (TD && HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD); + + return 0; + } +}; + +//===---------------------------------------===// +// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc. + +struct StrToOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy()) + return 0; + + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addAttribute(1, Attribute::NoCapture); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'strspn' Optimizations + +struct StrSpnOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + std::string S1, S2; + bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) + return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str())); + + return 0; + } +}; + +//===---------------------------------------===// +// 'strcspn' Optimizations + +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + std::string S1, S2; + bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) + return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str())); + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD); + + return 0; + } +}; + +//===---------------------------------------===// +// 'strstr' Optimizations + +struct StrStrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD); + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + Old->replaceAllUsesWith(Cmp); + Old->eraseFromParent(); + } + return CI; + } + + // See if either input string is a constant string. + std::string SearchStr, ToFindStr; + bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == std::string::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), + ToFindStr[0], B, TD), CI->getType()); + return 0; + } +}; + + +//===---------------------------------------===// +// 'memcmp' Optimizations + +struct MemCmpOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) + return 0; + + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + std::string LHSStr, RHSStr; + if (GetConstantStringInfo(LHS, LHSStr) && + GetConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.length() || Len > RHSStr.length()) + return 0; + uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); + return ConstantInt::get(CI->getType(), Ret); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'memcpy' Optimizations + +struct MemCpyOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +//===---------------------------------------===// +// 'memmove' Optimizations + +struct MemMoveOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +//===---------------------------------------===// +// 'memset' Optimizations + +struct MemSetOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +//===----------------------------------------------------------------------===// +// Math Library Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'pow*' Optimizations + +struct PowOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return 0; + + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 + return Op1C; + if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) + return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + } + + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); + if (Op2C == 0) return 0; + + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 + return ConstantFP::get(CI->getType(), 1.0); + + if (Op2C->isExactlyValue(0.5)) { + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow, and still handles negative zero + // and negative infinite correctly. + // TODO: In fast-math mode, this could be just sqrt(x). + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *Inf = ConstantFP::getInfinity(CI->getType()); + Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, + Callee->getAttributes()); + Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, + Callee->getAttributes()); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); + return Sel; + } + + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + return Op1; + if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + return B.CreateFMul(Op1, Op1, "pow2"); + if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), + Op1, "powrecip"); + return 0; + } +}; + +//===---------------------------------------===// +// 'exp2' Optimizations + +struct Exp2Opt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return 0; + + Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 + // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + Value *LdExpArg = 0; + if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); + } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); + } + + if (LdExpArg) { + const char *Name; + if (Op->getType()->isFloatTy()) + Name = "ldexpf"; + else if (Op->getType()->isDoubleTy()) + Name = "ldexp"; + else + Name = "ldexpl"; + + Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); + + Module *M = Caller->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; + } + return 0; + } +}; + +//===---------------------------------------===// +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B, + Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; + +//===----------------------------------------------------------------------===// +// Integer Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'ffs*' Optimizations + +struct FFSOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 1 || + !FT->getReturnType()->isIntegerTy(32) || + !FT->getParamType(0)->isIntegerTy()) + return 0; + + Value *Op = CI->getArgOperand(0); + + // Constant fold. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + if (CI->getValue() == 0) // ffs(0) -> 0. + return Constant::getNullValue(CI->getType()); + // ffs(c) -> cttz(c)+1 + return B.getInt32(CI->getValue().countTrailingZeros() + 1); + } + + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 + Type *ArgType = Op->getType(); + Value *F = Intrinsic::getDeclaration(Callee->getParent(), + Intrinsic::cttz, ArgType); + Value *V = B.CreateCall(F, Op, "cttz"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, B.getInt32Ty(), false); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); + return B.CreateSelect(Cond, V, B.getInt32(0)); + } +}; + +//===---------------------------------------===// +// 'isdigit' Optimizations + +struct IsDigitOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isdigit(c) -> (c-'0') <u 10 + Value *Op = CI->getArgOperand(0); + Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +//===---------------------------------------===// +// 'isascii' Optimizations + +struct IsAsciiOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isascii(c) -> c <u 128 + Value *Op = CI->getArgOperand(0); + Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +//===---------------------------------------===// +// 'abs', 'labs', 'llabs' Optimizations + +struct AbsOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(integer) where the types agree. + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + FT->getParamType(0) != FT->getReturnType()) + return 0; + + // abs(x) -> x >s -1 ? x : -x + Value *Op = CI->getArgOperand(0); + Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), + "ispos"); + Value *Neg = B.CreateNeg(Op, "neg"); + return B.CreateSelect(Pos, Op, Neg); + } +}; + + +//===---------------------------------------===// +// 'toascii' Optimizations + +struct ToAsciiOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require i32(i32) + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isascii(c) -> c & 0x7f + return B.CreateAnd(CI->getArgOperand(0), + ConstantInt::get(CI->getType(),0x7F)); + } +}; + +//===----------------------------------------------------------------------===// +// Formatting and IO Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'printf' Optimizations + +struct PrintFOpt : public LibCallOptimization { + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + std::string FormatStr; + if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) + return 0; + + // Empty format string -> noop. + if (FormatStr.empty()) // Tolerate printf's declared void. + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 0); + + // Do not do any of the following transformations if the printf return value + // is used, in general the printf return value is not compatible with either + // putchar() or puts(). + if (!CI->use_empty()) + return 0; + + // printf("x") -> putchar('x'), even for '%'. + if (FormatStr.size() == 1) { + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD); + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("foo\n") --> puts("foo") + if (FormatStr[FormatStr.size()-1] == '\n' && + FormatStr.find('%') == std::string::npos) { // no format characters. + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr.erase(FormatStr.end()-1); + Constant *C = ConstantArray::get(*Context, FormatStr, true); + C = new GlobalVariable(*Callee->getParent(), C->getType(), true, + GlobalVariable::InternalLinkage, C, "str"); + EmitPutS(C, B, TD); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); + } + + // Optimize specific format strings. + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); + + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("%s\n", str) --> puts(str) + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy()) { + EmitPutS(CI->getArgOperand(1), B, TD); + return CI; + } + return 0; + } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +//===---------------------------------------===// +// 'sprintf' Optimizations + +struct SPrintFOpt : public LibCallOptimization { + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + std::string FormatStr; + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumArgOperands() == 2) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return 0; // we found a format specifier, bail out. + + // These optimizations require TargetData. + if (!TD) return 0; + + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + FormatStr.size() + 1), 1); // nul byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // These optimizations require TargetData. + if (!TD) return 0; + + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); + Value *IncLen = B.CreateAdd(Len, + ConstantInt::get(Len->getType(), 1), + "leninc"); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return 0; + } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +//===---------------------------------------===// +// 'fwrite' Optimizations + +struct FWriteOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require a pointer, an integer, an integer, a pointer, returning integer. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + !FT->getParamType(2)->isIntegerTy() || + !FT->getParamType(3)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeC || !CountC) return 0; + uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + EmitFPutC(Char, CI->getArgOperand(3), B, TD); + return ConstantInt::get(CI->getType(), 1); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'fputs' Optimizations + +struct FPutsOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + + // Require two pointers. Also, we can't optimize if return value is used. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !CI->use_empty()) + return 0; + + // fputs(s,F) --> fwrite(s,1,strlen(s),F) + uint64_t Len = GetStringLength(CI->getArgOperand(0)); + if (!Len) return 0; + EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD); + return CI; // Known to have no uses (see above). + } +}; + +//===---------------------------------------===// +// 'fprintf' Optimizations + +struct FPrintFOpt : public LibCallOptimization { + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // All the optimizations depend on the format string. + std::string FormatStr; + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->getNumArgOperands() == 2) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') // Could handle %% -> % if we cared. + return 0; // We found a format specifier. + + // These optimizations require TargetData. + if (!TD) return 0; + + EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD); + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) + return 0; + EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); + return CI; + } + return 0; + } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +//===---------------------------------------===// +// 'puts' Optimizations + +struct PutsOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + // Check for a constant string. + std::string Str; + if (!GetConstantStringInfo(CI->getArgOperand(0), Str)) + return 0; + + if (Str.empty() && CI->use_empty()) { + // puts("") -> putchar('\n') + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD); + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); + } + + return 0; + } +}; + +} // end anonymous namespace. + +//===----------------------------------------------------------------------===// +// SimplifyLibCalls Pass Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// This pass optimizes well known library functions from libc and libm. + /// + class SimplifyLibCalls : public FunctionPass { + TargetLibraryInfo *TLI; + + StringMap<LibCallOptimization*> Optimizations; + // String and Memory LibCall Optimizations + StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr; + StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; + StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk; + StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; + MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; + // Math Library Optimizations + PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; + // Integer Optimizations + FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; + ToAsciiOpt ToAscii; + // Formatting and IO Optimizations + SPrintFOpt SPrintF; PrintFOpt PrintF; + FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; + PutsOpt Puts; + + bool Modified; // This is only used by doInitialization. + public: + static char ID; // Pass identification + SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) { + initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); + } + void InitOptimizations(); + bool runOnFunction(Function &F); + + void setDoesNotAccessMemory(Function &F); + void setOnlyReadsMemory(Function &F); + void setDoesNotThrow(Function &F); + void setDoesNotCapture(Function &F, unsigned n); + void setDoesNotAlias(Function &F, unsigned n); + bool doInitialization(Module &M); + + void inferPrototypeAttributes(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); + } + }; +} // end anonymous namespace. + +char SimplifyLibCalls::ID = 0; + +INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false) + +// Public interface to the Simplify LibCalls pass. +FunctionPass *llvm::createSimplifyLibCallsPass() { + return new SimplifyLibCalls(); +} + +/// Optimizations - Populate the Optimizations map with all the optimizations +/// we know. +void SimplifyLibCalls::InitOptimizations() { + // String and Memory LibCall Optimizations + Optimizations["strcat"] = &StrCat; + Optimizations["strncat"] = &StrNCat; + Optimizations["strchr"] = &StrChr; + Optimizations["strrchr"] = &StrRChr; + Optimizations["strcmp"] = &StrCmp; + Optimizations["strncmp"] = &StrNCmp; + Optimizations["strcpy"] = &StrCpy; + Optimizations["strncpy"] = &StrNCpy; + Optimizations["strlen"] = &StrLen; + Optimizations["strpbrk"] = &StrPBrk; + Optimizations["strtol"] = &StrTo; + Optimizations["strtod"] = &StrTo; + Optimizations["strtof"] = &StrTo; + Optimizations["strtoul"] = &StrTo; + Optimizations["strtoll"] = &StrTo; + Optimizations["strtold"] = &StrTo; + Optimizations["strtoull"] = &StrTo; + Optimizations["strspn"] = &StrSpn; + Optimizations["strcspn"] = &StrCSpn; + Optimizations["strstr"] = &StrStr; + Optimizations["memcmp"] = &MemCmp; + if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy; + Optimizations["memmove"] = &MemMove; + if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet; + + // _chk variants of String and Memory LibCall Optimizations. + Optimizations["__strcpy_chk"] = &StrCpyChk; + + // Math Library Optimizations + Optimizations["powf"] = &Pow; + Optimizations["pow"] = &Pow; + Optimizations["powl"] = &Pow; + Optimizations["llvm.pow.f32"] = &Pow; + Optimizations["llvm.pow.f64"] = &Pow; + Optimizations["llvm.pow.f80"] = &Pow; + Optimizations["llvm.pow.f128"] = &Pow; + Optimizations["llvm.pow.ppcf128"] = &Pow; + Optimizations["exp2l"] = &Exp2; + Optimizations["exp2"] = &Exp2; + Optimizations["exp2f"] = &Exp2; + Optimizations["llvm.exp2.ppcf128"] = &Exp2; + Optimizations["llvm.exp2.f128"] = &Exp2; + Optimizations["llvm.exp2.f80"] = &Exp2; + Optimizations["llvm.exp2.f64"] = &Exp2; + Optimizations["llvm.exp2.f32"] = &Exp2; + +#ifdef HAVE_FLOORF + Optimizations["floor"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_CEILF + Optimizations["ceil"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_ROUNDF + Optimizations["round"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_RINTF + Optimizations["rint"] = &UnaryDoubleFP; +#endif +#ifdef HAVE_NEARBYINTF + Optimizations["nearbyint"] = &UnaryDoubleFP; +#endif + + // Integer Optimizations + Optimizations["ffs"] = &FFS; + Optimizations["ffsl"] = &FFS; + Optimizations["ffsll"] = &FFS; + Optimizations["abs"] = &Abs; + Optimizations["labs"] = &Abs; + Optimizations["llabs"] = &Abs; + Optimizations["isdigit"] = &IsDigit; + Optimizations["isascii"] = &IsAscii; + Optimizations["toascii"] = &ToAscii; + + // Formatting and IO Optimizations + Optimizations["sprintf"] = &SPrintF; + Optimizations["printf"] = &PrintF; + Optimizations["fwrite"] = &FWrite; + Optimizations["fputs"] = &FPuts; + Optimizations["fprintf"] = &FPrintF; + Optimizations["puts"] = &Puts; +} + + +/// runOnFunction - Top level algorithm. +/// +bool SimplifyLibCalls::runOnFunction(Function &F) { + TLI = &getAnalysis<TargetLibraryInfo>(); + + if (Optimizations.empty()) + InitOptimizations(); + + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + + IRBuilder<> Builder(F.getContext()); + + bool Changed = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + // Ignore non-calls. + CallInst *CI = dyn_cast<CallInst>(I++); + if (!CI) continue; + + // Ignore indirect calls and calls to non-external functions. + Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || !Callee->isDeclaration() || + !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage())) + continue; + + // Ignore unknown calls. + LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + if (!LCO) continue; + + // Set the builder to the instruction after the call. + Builder.SetInsertPoint(BB, I); + + // Use debug location of CI for all new instructions. + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Try to optimize this call. + Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder); + if (Result == 0) continue; + + DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI; + dbgs() << " into: " << *Result << "\n"); + + // Something changed! + Changed = true; + ++NumSimplified; + + // Inspect the instruction after the call (which was potentially just + // added) next. + I = CI; ++I; + + if (CI != Result && !CI->use_empty()) { + CI->replaceAllUsesWith(Result); + if (!Result->hasName()) + Result->takeName(CI); + } + CI->eraseFromParent(); + } + } + return Changed; +} + +// Utility methods for doInitialization. + +void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) { + if (!F.doesNotAccessMemory()) { + F.setDoesNotAccessMemory(); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setOnlyReadsMemory(Function &F) { + if (!F.onlyReadsMemory()) { + F.setOnlyReadsMemory(); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setDoesNotThrow(Function &F) { + if (!F.doesNotThrow()) { + F.setDoesNotThrow(); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) { + if (!F.doesNotCapture(n)) { + F.setDoesNotCapture(n); + ++NumAnnotated; + Modified = true; + } +} +void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { + if (!F.doesNotAlias(n)) { + F.setDoesNotAlias(n); + ++NumAnnotated; + Modified = true; + } +} + + +void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { + FunctionType *FTy = F.getFunctionType(); + + StringRef Name = F.getName(); + switch (Name[0]) { + case 's': + if (Name == "strlen") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "strchr" || + Name == "strrchr") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + } else if (Name == "strcpy" || + Name == "stpcpy" || + Name == "strcat" || + Name == "strtol" || + Name == "strtod" || + Name == "strtof" || + Name == "strtoul" || + Name == "strtoll" || + Name == "strtold" || + Name == "strncat" || + Name == "strncpy" || + Name == "strtoull") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "strxfrm") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "strcmp" || + Name == "strspn" || + Name == "strncmp" || + Name == "strcspn" || + Name == "strcoll" || + Name == "strcasecmp" || + Name == "strncasecmp") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "strstr" || + Name == "strpbrk") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "strtok" || + Name == "strtok_r") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "scanf" || + Name == "setbuf" || + Name == "setvbuf") { + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "strdup" || + Name == "strndup") { + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "stat" || + Name == "sscanf" || + Name == "sprintf" || + Name == "statvfs") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "snprintf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } else if (Name == "setitimer") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + } else if (Name == "system") { + if (FTy->getNumParams() != 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + case 'm': + if (Name == "malloc") { + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "memcmp") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "memchr" || + Name == "memrchr") { + if (FTy->getNumParams() != 3) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + } else if (Name == "modf" || + Name == "modff" || + Name == "modfl" || + Name == "memcpy" || + Name == "memccpy" || + Name == "memmove") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "memalign") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotAlias(F, 0); + } else if (Name == "mkdir" || + Name == "mktime") { + if (FTy->getNumParams() == 0 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'r': + if (Name == "realloc") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "read") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } else if (Name == "rmdir" || + Name == "rewind" || + Name == "remove" || + Name == "realpath") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "rename" || + Name == "readlink") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'w': + if (Name == "write") { + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } + break; + case 'b': + if (Name == "bcopy") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "bcmp") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "bzero") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'c': + if (Name == "calloc") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "chmod" || + Name == "chown" || + Name == "ctermid" || + Name == "clearerr" || + Name == "closedir") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'a': + if (Name == "atoi" || + Name == "atol" || + Name == "atof" || + Name == "atoll") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if (Name == "access") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'f': + if (Name == "fopen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "fdopen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + } else if (Name == "feof" || + Name == "free" || + Name == "fseek" || + Name == "ftell" || + Name == "fgetc" || + Name == "fseeko" || + Name == "ftello" || + Name == "fileno" || + Name == "fflush" || + Name == "fclose" || + Name == "fsetpos" || + Name == "flockfile" || + Name == "funlockfile" || + Name == "ftrylockfile") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "ferror") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + } else if (Name == "fputc" || + Name == "fstat" || + Name == "frexp" || + Name == "frexpf" || + Name == "frexpl" || + Name == "fstatvfs") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "fgets") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + } else if (Name == "fread" || + Name == "fwrite") { + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + } else if (Name == "fputs" || + Name == "fscanf" || + Name == "fprintf" || + Name == "fgetpos") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'g': + if (Name == "getc" || + Name == "getlogin_r" || + Name == "getc_unlocked") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "getenv") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if (Name == "gets" || + Name == "getchar") { + setDoesNotThrow(F); + } else if (Name == "getitimer") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "getpwnam") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'u': + if (Name == "ungetc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "uname" || + Name == "unlink" || + Name == "unsetenv") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "utime" || + Name == "utimes") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'p': + if (Name == "putc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "puts" || + Name == "printf" || + Name == "perror") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "pread" || + Name == "pwrite") { + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + } else if (Name == "putchar") { + setDoesNotThrow(F); + } else if (Name == "popen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "pclose") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'v': + if (Name == "vscanf") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "vsscanf" || + Name == "vfscanf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "valloc") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "vprintf") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "vfprintf" || + Name == "vsprintf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "vsnprintf") { + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } + break; + case 'o': + if (Name == "open") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } else if (Name == "opendir") { + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } + break; + case 't': + if (Name == "tmpfile") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "times") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'h': + if (Name == "htonl" || + Name == "htons") { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'n': + if (Name == "ntohl" || + Name == "ntohs") { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'l': + if (Name == "lstat") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "lchown") { + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'q': + if (Name == "qsort") { + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + } + break; + case '_': + if (Name == "__strdup" || + Name == "__strndup") { + if (FTy->getNumParams() < 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "__strtok_r") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "_IO_getc") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "_IO_putc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } + break; + case 1: + if (Name == "\1__isoc99_scanf") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "\1stat64" || + Name == "\1lstat64" || + Name == "\1statvfs64" || + Name == "\1__isoc99_sscanf") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "\1fopen64") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "\1fseeko64" || + Name == "\1ftello64") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "\1tmpfile64") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "\1fstat64" || + Name == "\1fstatvfs64") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "\1open64") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + } +} + +/// doInitialization - Add attributes to well-known functions. +/// +bool SimplifyLibCalls::doInitialization(Module &M) { + Modified = false; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (F.isDeclaration() && F.hasName()) + inferPrototypeAttributes(F); + } + return Modified; +} + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(sqrt(x)) -> pow(x,1/9) +// +// cos, cosf, cosl: +// * cos(-x) -> cos(x) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(x**y) -> y*log(x) +// * log(exp(y)) -> y*log(e) +// * log(exp2(y)) -> y*log(2) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// * log(pow(x,y)) -> y*log(x) +// +// lround, lroundf, lroundl: +// * lround(cnst) -> cnst' +// +// pow, powf, powl: +// * pow(exp(x),y) -> exp(x*y) +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// round, roundf, roundl: +// * round(cnst) -> cnst' +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// +// stpcpy: +// * stpcpy(str, "literal") -> +// llvm.memcpy(str,"literal",strlen("literal")+1,1) +// +// tan, tanf, tanl: +// * tan(atan(x)) -> x +// +// trunc, truncf, truncl: +// * trunc(cnst) -> cnst' +// +//
diff --git a/src/LLVM/lib/Transforms/Scalar/Sink.cpp b/src/LLVM/lib/Transforms/Scalar/Sink.cpp new file mode 100644 index 0000000..c83f56c --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,273 @@ +//===-- Sink.cpp - Code Sinking -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into successor blocks, when possible, so that +// they aren't executed on paths where their results aren't needed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sink" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumSunk, "Number of instructions sunk"); + +namespace { + class Sinking : public FunctionPass { + DominatorTree *DT; + LoopInfo *LI; + AliasAnalysis *AA; + + public: + static char ID; // Pass identification + Sinking() : FunctionPass(ID) { + initializeSinkingPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<DominatorTree>(); + AU.addPreserved<LoopInfo>(); + } + private: + bool ProcessBlock(BasicBlock &BB); + bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores); + bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const; + }; +} // end anonymous namespace + +char Sinking::ID = 0; +INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false) + +FunctionPass *llvm::createSinkingPass() { return new Sinking(); } + +/// AllUsesDominatedByBlock - Return true if all uses of the specified value +/// occur in blocks dominated by the specified block. +bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, + BasicBlock *BB) const { + // Ignoring debug uses is necessary so debug info doesn't affect the code. + // This may leave a referencing dbg_value in the original block, before + // the definition of the vreg. Dwarf generator handles this although the + // user might not get the right info at runtime. + for (Value::use_iterator I = Inst->use_begin(), + E = Inst->use_end(); I != E; ++I) { + // Determine the block of the use. + Instruction *UseInst = cast<Instruction>(*I); + BasicBlock *UseBlock = UseInst->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(UseInst)) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo()); + UseBlock = PN->getIncomingBlock(Num); + } + // Check that it dominates. + if (!DT->dominates(BB, UseBlock)) + return false; + } + return true; +} + +bool Sinking::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + AA = &getAnalysis<AliasAnalysis>(); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + for (Function::iterator I = F.begin(), E = F.end(); + I != E; ++I) + MadeChange |= ProcessBlock(*I); + + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + return EverMadeChange; +} + +bool Sinking::ProcessBlock(BasicBlock &BB) { + // Can't sink anything out of a block that has less than two successors. + if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false; + + // Don't bother sinking code out of unreachable blocks. In addition to being + // unprofitable, it can also lead to infinite looping, because in an unreachable + // loop there may be nowhere to stop. + if (!DT->isReachableFromEntry(&BB)) return false; + + bool MadeChange = false; + + // Walk the basic block bottom-up. Remember if we saw a store. + BasicBlock::iterator I = BB.end(); + --I; + bool ProcessedBegin = false; + SmallPtrSet<Instruction *, 8> Stores; + do { + Instruction *Inst = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == BB.begin(); + if (!ProcessedBegin) + --I; + + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + if (SinkInstruction(Inst, Stores)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, + SmallPtrSet<Instruction *, 8> &Stores) { + + if (Inst->mayWriteToMemory()) { + Stores.insert(Inst); + return false; + } + + if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { + AliasAnalysis::Location Loc = AA->getLocation(L); + for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(), + E = Stores.end(); I != E; ++I) + if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod) + return false; + } + + if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst)) + return false; + + return true; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool Sinking::SinkInstruction(Instruction *Inst, + SmallPtrSet<Instruction *, 8> &Stores) { + // Check if it's safe to move the instruction. + if (!isSafeToMove(Inst, AA, Stores)) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + BasicBlock *ParentBlock = Inst->getParent(); + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + BasicBlock *SuccToSinkTo = 0; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // x = computation + // if () {} else {} + // use x + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Instructions can only be sunk if all their uses are in blocks + // dominated by one of the successors. + // Look at all the successors and decide which one + // we should sink to. + for (succ_iterator SI = succ_begin(ParentBlock), + E = succ_end(ParentBlock); SI != E; ++SI) { + if (AllUsesDominatedByBlock(Inst, *SI)) { + SuccToSinkTo = *SI; + break; + } + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return false; + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (Inst->getParent() == SuccToSinkTo) + return false; + + DEBUG(dbgs() << "Sink instr " << *Inst); + DEBUG(dbgs() << "to block "; + WriteAsOperand(dbgs(), SuccToSinkTo, false)); + + // If the block has multiple predecessors, this would introduce computation on + // a path that it doesn't already exist. We could split the critical edge, + // but for now we just punt. + // FIXME: Split critical edges if not backedges. + if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) { + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + if (!Inst->isSafeToSpeculativelyExecute()) { + DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); + return false; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); + return false; + } + + // Don't sink instructions into a loop. + if (LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); + return false; + } + + // Otherwise we are OK with sinking along a critical edge. + DEBUG(dbgs() << "Sinking along critical edge.\n"); + } + + // Determine where to insert into. Skip phi nodes. + BasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos)) + ++InsertPos; + + // Move the instruction. + Inst->moveBefore(InsertPos); + return true; +}
diff --git a/src/LLVM/lib/Transforms/Scalar/TailRecursionElimination.cpp b/src/LLVM/lib/Transforms/Scalar/TailRecursionElimination.cpp new file mode 100644 index 0000000..e21eb9d --- /dev/null +++ b/src/LLVM/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -0,0 +1,634 @@ +//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file transforms calls of the current function (self recursion) followed +// by a return instruction with a branch to the entry of the function, creating +// a loop. This pass also implements the following extensions to the basic +// algorithm: +// +// 1. Trivial instructions between the call and return do not prevent the +// transformation from taking place, though currently the analysis cannot +// support moving any really useful instructions (only dead ones). +// 2. This pass transforms functions that are prevented from being tail +// recursive by an associative and commutative expression to use an +// accumulator variable, thus compiling the typical naive factorial or +// 'fib' implementation into efficient code. +// 3. TRE is performed if the function returns void, if the return +// returns the result returned by the call, or if the function returns a +// run-time constant on all exits from the function. It is possible, though +// unlikely, that the return returns something else (like constant 0), and +// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in +// the function return the exact same value. +// 4. If it can prove that callees do not access their caller stack frame, +// they are marked as eligible for tail call elimination (by the code +// generator). +// +// There are several improvements that could be made: +// +// 1. If the function has any alloca instructions, these instructions will be +// moved out of the entry block of the function, causing them to be +// evaluated each time through the tail recursion. Safely keeping allocas +// in the entry block requires analysis to proves that the tail-called +// function does not read or write the stack object. +// 2. Tail recursion is only performed if the call immediately precedes the +// return instruction. It's possible that there could be a jump between +// the call and the return. +// 3. There can be intervening operations between the call and the return that +// prevent the TRE from occurring. For example, there could be GEP's and +// stores to memory that will not be read or written by the call. This +// requires some substantial analysis (such as with DSA) to prove safe to +// move ahead of the call, but doing so could allow many more TREs to be +// performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark. +// 4. The algorithm we use to detect if callees access their caller stack +// frames is very primitive. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tailcallelim" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +STATISTIC(NumEliminated, "Number of tail calls removed"); +STATISTIC(NumRetDuped, "Number of return duplicated"); +STATISTIC(NumAccumAdded, "Number of accumulators introduced"); + +namespace { + struct TailCallElim : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + TailCallElim() : FunctionPass(ID) { + initializeTailCallElimPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + private: + CallInst *FindTRECandidate(Instruction *I, + bool CannotTailCallElimCallsMarkedTail); + bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); + bool FoldReturnAndProcessPred(BasicBlock *BB, + ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); + bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail); + bool CanMoveAboveCall(Instruction *I, CallInst *CI); + Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI); + }; +} + +char TailCallElim::ID = 0; +INITIALIZE_PASS(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false) + +// Public interface to the TailCallElimination pass +FunctionPass *llvm::createTailCallEliminationPass() { + return new TailCallElim(); +} + +/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by +/// callees of this function. We only do very simple analysis right now, this +/// could be expanded in the future to use mod/ref information for particular +/// call sites if desired. +static bool AllocaMightEscapeToCalls(AllocaInst *AI) { + // FIXME: do simple 'address taken' analysis. + return true; +} + +/// CheckForEscapingAllocas - Scan the specified basic block for alloca +/// instructions. If it contains any that might be accessed by calls, return +/// true. +static bool CheckForEscapingAllocas(BasicBlock *BB, + bool &CannotTCETailMarkedCall) { + bool RetVal = false; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + RetVal |= AllocaMightEscapeToCalls(AI); + + // If this alloca is in the body of the function, or if it is a variable + // sized allocation, we cannot tail call eliminate calls marked 'tail' + // with this mechanism. + if (BB != &BB->getParent()->getEntryBlock() || + !isa<ConstantInt>(AI->getArraySize())) + CannotTCETailMarkedCall = true; + } + return RetVal; +} + +bool TailCallElim::runOnFunction(Function &F) { + // If this function is a varargs function, we won't be able to PHI the args + // right, so don't even try to convert it... + if (F.getFunctionType()->isVarArg()) return false; + + BasicBlock *OldEntry = 0; + bool TailCallsAreMarkedTail = false; + SmallVector<PHINode*, 8> ArgumentPHIs; + bool MadeChange = false; + bool FunctionContainsEscapingAllocas = false; + + // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls + // marked with the 'tail' attribute, because doing so would cause the stack + // size to increase (real TCE would deallocate variable sized allocas, TCE + // doesn't). + bool CannotTCETailMarkedCall = false; + + // Loop over the function, looking for any returning blocks, and keeping track + // of whether this function has any non-trivially used allocas. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall) + break; + + FunctionContainsEscapingAllocas |= + CheckForEscapingAllocas(BB, CannotTCETailMarkedCall); + } + + /// FIXME: The code generator produces really bad code when an 'escaping + /// alloca' is changed from being a static alloca to being a dynamic alloca. + /// Until this is resolved, disable this transformation if that would ever + /// happen. This bug is PR962. + if (FunctionContainsEscapingAllocas) + return false; + + // Second pass, change any tail calls to loops. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { + bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs,CannotTCETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + CannotTCETailMarkedCall); + MadeChange |= Change; + } + } + + // If we eliminated any tail recursions, it's possible that we inserted some + // silly PHI nodes which just merge an initial value (the incoming operand) + // with themselves. Check to see if we did and clean up our mess if so. This + // occurs when a function passes an argument straight through to its tail + // call. + if (!ArgumentPHIs.empty()) { + for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { + PHINode *PN = ArgumentPHIs[i]; + + // If the PHI Node is a dynamic constant, replace it with the value it is. + if (Value *PNV = SimplifyInstruction(PN)) { + PN->replaceAllUsesWith(PNV); + PN->eraseFromParent(); + } + } + } + + // Finally, if this function contains no non-escaping allocas, or calls + // setjmp, mark all calls in the function as eligible for tail calls + //(there is no stack memory for them to access). + if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice()) + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + CI->setTailCall(); + MadeChange = true; + } + + return MadeChange; +} + + +/// CanMoveAboveCall - Return true if it is safe to move the specified +/// instruction from after the call to before the call, assuming that all +/// instructions between the call and this instruction are movable. +/// +bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { + // FIXME: We can move load/store/call/free instructions above the call if the + // call does not mod/ref the memory location being processed. + if (I->mayHaveSideEffects()) // This also handles volatile loads. + return false; + + if (LoadInst *L = dyn_cast<LoadInst>(I)) { + // Loads may always be moved above calls without side effects. + if (CI->mayHaveSideEffects()) { + // Non-volatile loads may be moved above a call with side effects if it + // does not write to memory and the load provably won't trap. + // FIXME: Writes to memory only matter if they may alias the pointer + // being loaded from. + if (CI->mayWriteToMemory() || + !isSafeToLoadUnconditionally(L->getPointerOperand(), L, + L->getAlignment())) + return false; + } + } + + // Otherwise, if this is a side-effect free instruction, check to make sure + // that it does not use the return value of the call. If it doesn't use the + // return value of the call, it must only use things that are defined before + // the call, or movable instructions between the call and the instruction + // itself. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i) == CI) + return false; + return true; +} + +// isDynamicConstant - Return true if the specified value is the same when the +// return would exit as it was when the initial iteration of the recursive +// function was executed. +// +// We currently handle static constants and arguments that are not modified as +// part of the recursion. +// +static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { + if (isa<Constant>(V)) return true; // Static constants are always dyn consts + + // Check to see if this is an immutable argument, if so, the value + // will be available to initialize the accumulator. + if (Argument *Arg = dyn_cast<Argument>(V)) { + // Figure out which argument number this is... + unsigned ArgNo = 0; + Function *F = CI->getParent()->getParent(); + for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI) + ++ArgNo; + + // If we are passing this argument into call as the corresponding + // argument operand, then the argument is dynamically constant. + // Otherwise, we cannot transform this function safely. + if (CI->getArgOperand(ArgNo) == Arg) + return true; + } + + // Switch cases are always constant integers. If the value is being switched + // on and the return is only reachable from one of its cases, it's + // effectively constant. + if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor()) + if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator())) + if (SI->getCondition() == V) + return SI->getDefaultDest() != RI->getParent(); + + // Not a constant or immutable argument, we can't safely transform. + return false; +} + +// getCommonReturnValue - Check to see if the function containing the specified +// tail call consistently returns the same runtime-constant value at all exit +// points except for IgnoreRI. If so, return the returned value. +// +static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { + Function *F = CI->getParent()->getParent(); + Value *ReturnedValue = 0; + + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { + ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); + if (RI == 0 || RI == IgnoreRI) continue; + + // We can only perform this transformation if the value returned is + // evaluatable at the start of the initial invocation of the function, + // instead of at the end of the evaluation. + // + Value *RetOp = RI->getOperand(0); + if (!isDynamicConstant(RetOp, CI, RI)) + return 0; + + if (ReturnedValue && RetOp != ReturnedValue) + return 0; // Cannot transform if differing values are returned. + ReturnedValue = RetOp; + } + return ReturnedValue; +} + +/// CanTransformAccumulatorRecursion - If the specified instruction can be +/// transformed using accumulator recursion elimination, return the constant +/// which is the start of the accumulator value. Otherwise return null. +/// +Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, + CallInst *CI) { + if (!I->isAssociative() || !I->isCommutative()) return 0; + assert(I->getNumOperands() == 2 && + "Associative/commutative operations should have 2 args!"); + + // Exactly one operand should be the result of the call instruction. + if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || + (I->getOperand(0) != CI && I->getOperand(1) != CI)) + return 0; + + // The only user of this instruction we allow is a single return instruction. + if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back())) + return 0; + + // Ok, now we have to check all of the other return instructions in this + // function. If they return non-constants or differing values, then we cannot + // transform the function safely. + return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI); +} + +static Instruction *FirstNonDbg(BasicBlock::iterator I) { + while (isa<DbgInfoIntrinsic>(I)) + ++I; + return &*I; +} + +CallInst* +TailCallElim::FindTRECandidate(Instruction *TI, + bool CannotTailCallElimCallsMarkedTail) { + BasicBlock *BB = TI->getParent(); + Function *F = BB->getParent(); + + if (&BB->front() == TI) // Make sure there is something before the terminator. + return 0; + + // Scan backwards from the return, checking to see if there is a tail call in + // this block. If so, set CI to it. + CallInst *CI = 0; + BasicBlock::iterator BBI = TI; + while (true) { + CI = dyn_cast<CallInst>(BBI); + if (CI && CI->getCalledFunction() == F) + break; + + if (BBI == BB->begin()) + return 0; // Didn't find a potential tail call. + --BBI; + } + + // If this call is marked as a tail call, and if there are dynamic allocas in + // the function, we cannot perform this optimization. + if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) + return 0; + + // As a special case, detect code like this: + // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call + // and disable this xform in this case, because the code generator will + // lower the call to fabs into inline code. + if (BB == &F->getEntryBlock() && + FirstNonDbg(BB->front()) == CI && + FirstNonDbg(llvm::next(BB->begin())) == TI && + callIsSmall(F)) { + // A single-block function with just a call and a return. Check that + // the arguments match. + CallSite::arg_iterator I = CallSite(CI).arg_begin(), + E = CallSite(CI).arg_end(); + Function::arg_iterator FI = F->arg_begin(), + FE = F->arg_end(); + for (; I != E && FI != FE; ++I, ++FI) + if (*I != &*FI) break; + if (I == E && FI == FE) + return 0; + } + + return CI; +} + +bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, + BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + // If we are introducing accumulator recursion to eliminate operations after + // the call instruction that are both associative and commutative, the initial + // value for the accumulator is placed in this variable. If this value is set + // then we actually perform accumulator recursion elimination instead of + // simple tail recursion elimination. If the operation is an LLVM instruction + // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then + // we are handling the case when the return instruction returns a constant C + // which is different to the constant returned by other return instructions + // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a + // special case of accumulator recursion, the operation being "return C". + Value *AccumulatorRecursionEliminationInitVal = 0; + Instruction *AccumulatorRecursionInstr = 0; + + // Ok, we found a potential tail call. We can currently only transform the + // tail call if all of the instructions between the call and the return are + // movable to above the call itself, leaving the call next to the return. + // Check that this is the case now. + BasicBlock::iterator BBI = CI; + for (++BBI; &*BBI != Ret; ++BBI) { + if (CanMoveAboveCall(BBI, CI)) continue; + + // If we can't move the instruction above the call, it might be because it + // is an associative and commutative operation that could be transformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. + if ((AccumulatorRecursionEliminationInitVal = + CanTransformAccumulatorRecursion(BBI, CI))) { + // Yes, this is accumulator recursion. Remember which instruction + // accumulates. + AccumulatorRecursionInstr = BBI; + } else { + return false; // Otherwise, we cannot eliminate the tail recursion! + } + } + + // We can only transform call/return pairs that either ignore the return value + // of the call and return void, ignore the value of the call and return a + // constant, return the value returned by the tail call, or that are being + // accumulator recursion variable eliminated. + if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && + !isa<UndefValue>(Ret->getReturnValue()) && + AccumulatorRecursionEliminationInitVal == 0 && + !getCommonReturnValue(0, CI)) { + // One case remains that we are able to handle: the current return + // instruction returns a constant, and all other return instructions + // return a different constant. + if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) + return false; // Current return instruction does not return a constant. + // Check that all other return instructions return a common constant. If + // so, record it in AccumulatorRecursionEliminationInitVal. + AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); + if (!AccumulatorRecursionEliminationInitVal) + return false; + } + + BasicBlock *BB = Ret->getParent(); + Function *F = BB->getParent(); + + // OK! We can transform this tail call. If this is the first one found, + // create the new entry block, allowing us to branch back to the old entry. + if (OldEntry == 0) { + OldEntry = &F->getEntryBlock(); + BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); + NewEntry->takeName(OldEntry); + OldEntry->setName("tailrecurse"); + BranchInst::Create(OldEntry, NewEntry); + + // If this tail call is marked 'tail' and if there are any allocas in the + // entry block, move them up to the new entry block. + TailCallsAreMarkedTail = CI->isTailCall(); + if (TailCallsAreMarkedTail) + // Move all fixed sized allocas from OldEntry to NewEntry. + for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), + NEBI = NewEntry->begin(); OEBI != E; ) + if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++)) + if (isa<ConstantInt>(AI->getArraySize())) + AI->moveBefore(NEBI); + + // Now that we have created a new block, which jumps to the entry + // block, insert a PHI node for each argument of the function. + // For now, we initialize each PHI to only have the real arguments + // which are passed in. + Instruction *InsertPos = OldEntry->begin(); + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) { + PHINode *PN = PHINode::Create(I->getType(), 2, + I->getName() + ".tr", InsertPos); + I->replaceAllUsesWith(PN); // Everyone use the PHI node now! + PN->addIncoming(I, NewEntry); + ArgumentPHIs.push_back(PN); + } + } + + // If this function has self recursive calls in the tail position where some + // are marked tail and some are not, only transform one flavor or another. We + // have to choose whether we move allocas in the entry block to the new entry + // block or not, so we can't make a good choice for both. NOTE: We could do + // slightly better here in the case that the function has no entry block + // allocas. + if (TailCallsAreMarkedTail && !CI->isTailCall()) + return false; + + // Ok, now that we know we have a pseudo-entry block WITH all of the + // required PHI nodes, add entries into the PHI node for the actual + // parameters passed into the tail-recursive call. + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); + + // If we are introducing an accumulator variable to eliminate the recursion, + // do so now. Note that we _know_ that no subsequent tail recursion + // eliminations will happen on this function because of the way the + // accumulator recursion predicate is set up. + // + if (AccumulatorRecursionEliminationInitVal) { + Instruction *AccRecInstr = AccumulatorRecursionInstr; + // Start by inserting a new PHI node for the accumulator. + pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); + PHINode *AccPN = + PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + std::distance(PB, PE) + 1, + "accumulator.tr", OldEntry->begin()); + + // Loop over all of the predecessors of the tail recursion block. For the + // real entry into the function we seed the PHI with the initial value, + // computed earlier. For any other existing branches to this block (due to + // other tail recursions eliminated) the accumulator is not modified. + // Because we haven't added the branch in the current block to OldEntry yet, + // it will not show up as a predecessor. + for (pred_iterator PI = PB; PI != PE; ++PI) { + BasicBlock *P = *PI; + if (P == &F->getEntryBlock()) + AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); + else + AccPN->addIncoming(AccPN, P); + } + + if (AccRecInstr) { + // Add an incoming argument for the current block, which is computed by + // our associative and commutative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does not + // use the result of the call anymore, instead, use the PHI node we just + // inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + } else { + // Add an incoming argument for the current block, which is just the + // constant returned by the current return instruction. + AccPN->addIncoming(Ret->getReturnValue(), BB); + } + + // Finally, rewrite any return instructions in the program to return the PHI + // node instead of the "initval" that they do currently. This loop will + // actually rewrite the return value we are destroying, but that's ok. + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) + RI->setOperand(0, AccPN); + ++NumAccumAdded; + } + + // Now that all of the PHI nodes are in place, remove the call and + // ret instructions, replacing them with an unconditional branch. + BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); + NewBI->setDebugLoc(CI->getDebugLoc()); + + BB->getInstList().erase(Ret); // Remove return. + BB->getInstList().erase(CI); // Remove call. + ++NumEliminated; + return true; +} + +bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB, + ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + bool Change = false; + + // If the return block contains nothing but the return and PHI's, + // there might be an opportunity to duplicate the return in its + // predecessors and perform TRC there. Look for predecessors that end + // in unconditional branch and recursive call(s). + SmallVector<BranchInst*, 8> UncondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *Pred = *PI; + TerminatorInst *PTI = Pred->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) + if (BI->isUnconditional()) + UncondBranchPreds.push_back(BI); + } + + while (!UncondBranchPreds.empty()) { + BranchInst *BI = UncondBranchPreds.pop_back_val(); + BasicBlock *Pred = BI->getParent(); + if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){ + DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred), + OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, + CannotTailCallElimCallsMarkedTail); + ++NumRetDuped; + Change = true; + } + } + + return Change; +} + +bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVector<PHINode*, 8> &ArgumentPHIs, + bool CannotTailCallElimCallsMarkedTail) { + CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); + if (!CI) + return false; + + return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs, + CannotTailCallElimCallsMarkedTail); +}
diff --git a/src/LLVM/lib/Transforms/Utils/AddrModeMatcher.cpp b/src/LLVM/lib/Transforms/Utils/AddrModeMatcher.cpp index 4d64c85..8e5a1eb 100644 --- a/src/LLVM/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/src/LLVM/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -21,6 +21,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CallSite.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -221,7 +222,7 @@ const TargetData *TD = TLI.getTargetData(); gep_type_iterator GTI = gep_type_begin(AddrInst); for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD->getStructLayout(STy); unsigned Idx = cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); @@ -379,27 +380,10 @@ /// return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI) { - std::vector<InlineAsm::ConstraintInfo> - Constraints = IA->ParseConstraints(); - - unsigned ArgNo = 0; // The argument of the CallInst. - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - TargetLowering::AsmOperandInfo OpInfo(Constraints[i]); - - // Compute the value type for each operand. - switch (OpInfo.Type) { - case InlineAsm::isOutput: - if (OpInfo.isIndirect) - OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); - break; - case InlineAsm::isInput: - OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); - break; - case InlineAsm::isClobber: - // Nothing to do. - break; - } - + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, SDValue()); @@ -573,7 +557,7 @@ Value *Address = User->getOperand(OpNo); if (!Address->getType()->isPointerTy()) return false; - const Type *AddressAccessTy = + Type *AddressAccessTy = cast<PointerType>(Address->getType())->getElementType(); // Do a match against the root of this address, ignoring profitability. This @@ -584,7 +568,7 @@ MemoryInst, Result); Matcher.IgnoreProfitability = true; bool Success = Matcher.MatchAddr(Address, 0); - Success = Success; assert(Success && "Couldn't select *anything*?"); + (void)Success; assert(Success && "Couldn't select *anything*?"); // If the match didn't cover I, then it won't be shared by it. if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
diff --git a/src/LLVM/lib/Transforms/Utils/BasicBlockUtils.cpp b/src/LLVM/lib/Transforms/Utils/BasicBlockUtils.cpp index 35bac77..a7f9efd 100644 --- a/src/LLVM/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/src/LLVM/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -19,8 +19,9 @@ #include "llvm/Constant.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" @@ -63,12 +64,27 @@ /// any single-entry PHI nodes in it, fold them away. This handles the case /// when all entries to the PHI nodes in a block are guaranteed equal, such as /// when the block has exactly one predecessor. -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) { +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { + if (!isa<PHINode>(BB->begin())) return; + + AliasAnalysis *AA = 0; + MemoryDependenceAnalysis *MemDep = 0; + if (P) { + AA = P->getAnalysisIfAvailable<AliasAnalysis>(); + MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); + } + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); else PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + + if (MemDep) + MemDep->removeInstruction(PN); // Memdep updates AA itself. + else if (AA && isa<PointerType>(PN->getType())) + AA->deleteValue(PN); + PN->eraseFromParent(); } } @@ -97,30 +113,20 @@ /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { - pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); - // Can't merge the entry block. Don't merge away blocks who have their - // address taken: this is a bug if the predecessor block is the entry node - // (because we'd end up taking the address of the entry) and undesirable in - // any case. - if (pred_begin(BB) == pred_end(BB) || - BB->hasAddressTaken()) return false; + // Don't merge away blocks who have their address taken. + if (BB->hasAddressTaken()) return false; - BasicBlock *PredBB = *PI++; - for (; PI != PE; ++PI) // Search all predecessors, see if they are all same - if (*PI != PredBB) { - PredBB = 0; // There are multiple different predecessors... - break; - } - - // Can't merge if there are multiple predecessors. + // Can't merge if there are multiple predecessors, or no predecessors. + BasicBlock *PredBB = BB->getUniquePredecessor(); if (!PredBB) return false; + // Don't break self-loops. if (PredBB == BB) return false; // Don't break invokes. - if (ISA_INVOKE_INST(PredBB->getTerminator())) return false; + if (isa<InvokeInst>(PredBB->getTerminator())) return false; succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); - BasicBlock* OnlySucc = BB; + BasicBlock *OnlySucc = BB; for (; SI != SE; ++SI) if (*SI != OnlySucc) { OnlySucc = 0; // There are multiple distinct successors! @@ -141,45 +147,46 @@ } // Begin by getting rid of unneeded PHIs. - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - BB->getInstList().pop_front(); // Delete the phi node... - } + if (isa<PHINode>(BB->front())) + FoldSingleEntryPHINodes(BB, P); // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); - // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); - // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + // Inherit predecessors name if it exists. if (!PredBB->hasName()) PredBB->takeName(BB); // Finally, erase the old block and update dominator info. if (P) { - if (DominatorTree* DT = P->getAnalysisIfAvailable<DominatorTree>()) { - DomTreeNode* DTN = DT->getNode(BB); - DomTreeNode* PredDTN = DT->getNode(PredBB); - - if (DTN) { - SmallPtrSet<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); - for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = Children.begin(), + if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(PredBB); + SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); + for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(), DE = Children.end(); DI != DE; ++DI) DT->changeImmediateDominator(*DI, PredDTN); DT->eraseNode(BB); } + + if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) + LI->removeBlock(BB); + + if (MemoryDependenceAnalysis *MD = + P->getAnalysisIfAvailable<MemoryDependenceAnalysis>()) + MD->invalidateCachedPredecessors(); } } BB->eraseFromParent(); - - return true; } @@ -228,50 +235,6 @@ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); } -/// RemoveSuccessor - Change the specified terminator instruction such that its -/// successor SuccNum no longer exists. Because this reduces the outgoing -/// degree of the current basic block, the actual terminator instruction itself -/// may have to be changed. In the case where the last successor of the block -/// is deleted, a return instruction is inserted in its place which can cause a -/// surprising change in program behavior if it is not expected. -/// -void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { - assert(SuccNum < TI->getNumSuccessors() && - "Trying to remove a nonexistant successor!"); - - // If our old successor block contains any PHI nodes, remove the entry in the - // PHI nodes that comes from this branch... - // - BasicBlock *BB = TI->getParent(); - TI->getSuccessor(SuccNum)->removePredecessor(BB); - - TerminatorInst *NewTI = 0; - switch (TI->getOpcode()) { - case Instruction::Br: - // If this is a conditional branch... convert to unconditional branch. - if (TI->getNumSuccessors() == 2) { - cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum)); - } else { // Otherwise convert to a return instruction... - Value *RetVal = 0; - - // Create a value to return... if the function doesn't return null... - if (!BB->getParent()->getReturnType()->isVoidTy()) - RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); - - // Create the return... - NewTI = ReturnInst::Create(TI->getContext(), RetVal); - } - break; - case Instruction::Switch: // Should remove entry - default: - case Instruction::Ret: // Cannot happen, has no successors! - llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!"); - } - - if (NewTI) // If it's a different instruction, replace. - ReplaceInstWithInst(TI, NewTI); -} - /// GetSuccessorNumber - Search for the specified successor of basic block BB /// and return its position in the terminator instruction's list of /// successors. It is an error to call this with a block that is not a @@ -308,13 +271,13 @@ assert(SP == BB && "CFG broken"); SP = NULL; return SplitBlock(Succ, Succ->begin(), P); - } else { - // Otherwise, if BB has a single successor, split it at the bottom of the - // block. - assert(BB->getTerminator()->getNumSuccessors() == 1 && - "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), P); } + + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + return SplitBlock(BB, BB->getTerminator(), P); } /// SplitBlock - Split the specified block at the specified instruction - every @@ -324,36 +287,159 @@ /// BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { BasicBlock::iterator SplitIt = SplitPt; - while (isa<PHINode>(SplitIt)) + while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt)) ++SplitIt; - BasicBlock *New = Old->splitBasicBlock(SplitIt); + BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); // The new block lives in whichever loop the old one did. This preserves // LCSSA as well, because we force the split point to be after any PHI nodes. - if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>()) + if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, LI->getBase()); if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { - // Old dominates New. New node domiantes all other nodes dominated by Old. - DomTreeNode *OldNode = DT->getNode(Old); - std::vector<DomTreeNode *> Children; - for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) - Children.push_back(*I); + // Old dominates New. New node dominates all other nodes dominated by Old. + if (DomTreeNode *OldNode = DT->getNode(Old)) { + std::vector<DomTreeNode *> Children; + for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); + I != E; ++I) + Children.push_back(*I); DomTreeNode *NewNode = DT->addNewBlock(New,Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); + } } - if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) - DF->splitBlock(Old); - return New; } +/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA +/// analysis information. +static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, + Pass *P, bool &HasLoopExit) { + if (!P) return; + + LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); + Loop *L = LI ? LI->getLoopFor(OldBB) : 0; + + // If we need to preserve loop analyses, collect some information about how + // this split will affect loops. + bool IsLoopEntry = !!L; + bool SplitMakesNewLoopHeader = false; + if (LI) { + bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) + HasLoopExit = true; + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; + } + } + + // Update dominator tree if available. + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + if (DT) + DT->splitBlock(NewBB); + + if (!L) return; + + if (IsLoopEntry) { + // Add the new block to the nearest enclosing loop (and not an adjacent + // loop). To find this, examine each of the predecessors and determine which + // loops enclose them, and select the most-nested loop which contains the + // loop containing the block being split. + Loop *InnermostPredLoop = 0; + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + if (Loop *PredLoop = LI->getLoopFor(Pred)) { + // Seek a loop which actually contains the block being split (to avoid + // adjacent loops). + while (PredLoop && !PredLoop->contains(OldBB)) + PredLoop = PredLoop->getParentLoop(); + + // Select the most-nested of these loops which contains the block. + if (PredLoop && PredLoop->contains(OldBB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + } + + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else { + L->addBasicBlockToLoop(NewBB, LI->getBase()); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); + } +} + +/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming +/// from NewBB. This also updates AliasAnalysis, if available. +static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, + ArrayRef<BasicBlock*> Preds, BranchInst *BI, + Pass *P, bool HasLoopExit) { + // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; + for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I++); + + // Check to see if all of the values coming in are the same. If so, we + // don't need to create a new PHI node, unless it's needed for LCSSA. + Value *InVal = 0; + if (!HasLoopExit) { + InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 1, e = Preds.size(); i != e; ++i) + if (InVal != PN->getIncomingValueForBlock(Preds[i])) { + InVal = 0; + break; + } + } + + if (InVal) { + // If all incoming values for the new PHI would be the same, just don't + // make a new PHI. Instead, just remove the incoming values from the old + // PHI. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + PN->removeIncomingValue(Preds[i], false); + } else { + // If the values coming into the block are not the same, we need a PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); + if (AA) AA->copyValue(PN, NewPHI); + + // Move all of the PHI values for 'Preds' to the new PHI. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + Value *V = PN->removeIncomingValue(Preds[i], false); + NewPHI->addIncoming(V, Preds[i]); + } + + InVal = NewPHI; + } + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + } +} /// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to @@ -362,66 +448,31 @@ /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. -/// In particular, it does not preserve LoopSimplify (because it's -/// complicated to handle the case where one of the edges being split -/// is an exit of a loop with other exits). +/// LoopInfo, and LCCSA but no other analyses. In particular, it does not +/// preserve LoopSimplify (because it's complicated to handle the case where one +/// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, - unsigned NumPreds, + unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. - BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); - LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; - Loop *L = LI ? LI->getLoopFor(BB) : 0; - bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); - // Move the edges from Preds to point to NewBB instead of BB. - // While here, if we need to preserve loop analyses, collect - // some information about how this split will affect loops. - bool HasLoopExit = false; - bool IsLoopEntry = !!L; - bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); - Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); - - if (LI) { - // If we need to preserve LCSSA, determine if any of - // the preds is a loop exit. - if (PreserveLCSSA) - if (Loop *PL = LI->getLoopFor(Preds[i])) - if (!PL->contains(BB)) - HasLoopExit = true; - // If we need to preserve LoopInfo, note whether any of the - // preds crosses an interesting loop boundary. - if (L) { - if (L->contains(Preds[i])) - IsLoopEntry = false; - else - SplitMakesNewLoopHeader = true; - } - } } - // Update dominator tree and dominator frontier if available. - DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; - if (DT) - DT->splitBlock(NewBB); - if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) - DF->splitBlock(NewBB); - // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to @@ -433,82 +484,124 @@ return NewBB; } - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), + P, HasLoopExit); - if (L) { - if (IsLoopEntry) { - // Add the new block to the nearest enclosing loop (and not an - // adjacent loop). To find this, examine each of the predecessors and - // determine which loops enclose them, and select the most-nested loop - // which contains the loop containing the block being split. - Loop *InnermostPredLoop = 0; - for (unsigned i = 0; i != NumPreds; ++i) - if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { - // Seek a loop which actually contains the block being split (to - // avoid adjacent loops). - while (PredLoop && !PredLoop->contains(BB)) - PredLoop = PredLoop->getParentLoop(); - // Select the most-nested of these loops which contains the block. - if (PredLoop && - PredLoop->contains(BB) && - (!InnermostPredLoop || - InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) - InnermostPredLoop = PredLoop; - } - if (InnermostPredLoop) - InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - } else { - L->addBasicBlockToLoop(NewBB, LI->getBase()); - if (SplitMakesNewLoopHeader) - L->moveToHeader(NewBB); - } - } - - // Otherwise, create a new PHI node in NewBB for each PHI node in BB. - for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { - PHINode *PN = cast<PHINode>(I++); - - // Check to see if all of the values coming in are the same. If so, we - // don't need to create a new PHI node, unless it's needed for LCSSA. - Value *InVal = 0; - if (!HasLoopExit) { - InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1; i != NumPreds; ++i) - if (InVal != PN->getIncomingValueForBlock(Preds[i])) { - InVal = 0; - break; - } - } - - if (InVal) { - // If all incoming values for the new PHI would be the same, just don't - // make a new PHI. Instead, just remove the incoming values from the old - // PHI. - for (unsigned i = 0; i != NumPreds; ++i) - PN->removeIncomingValue(Preds[i], false); - } else { - // If the values coming into the block are not the same, we need a PHI. - // Create the new PHI node, insert it into NewBB at the end of the block - PHINode *NewPHI = - PHINode::Create(PN->getType(), BI); - if (AA) AA->copyValue(PN, NewPHI); - - // Move all of the PHI values for 'Preds' to the new PHI. - for (unsigned i = 0; i != NumPreds; ++i) { - Value *V = PN->removeIncomingValue(Preds[i], false); - NewPHI->addIncoming(V, Preds[i]); - } - InVal = NewPHI; - } - - // Add an incoming value to the PHI node in the loop for the preheader - // edge. - PN->addIncoming(InVal, NewBB); - } - + // Update the PHI nodes in BB with the values coming from NewBB. + UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI, + P, HasLoopExit); return NewBB; } +/// SplitLandingPadPredecessors - This method transforms the landing pad, +/// OrigBB, by introducing two new basic blocks into the function. One of those +/// new basic blocks gets the predecessors listed in Preds. The other basic +/// block gets the remaining predecessors of OrigBB. The landingpad instruction +/// OrigBB is clone into both of the new basic blocks. The new blocks are given +/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, +/// it does not preserve LoopSimplify (because it's complicated to handle the +/// case where one of the edges being split is an exit of a loop with other +/// exits). +/// +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock*> Preds, + const char *Suffix1, const char *Suffix2, + Pass *P, + SmallVectorImpl<BasicBlock*> &NewBBs) { + assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); + + // Create a new basic block for OrigBB's predecessors listed in Preds. Insert + // it right before the original block. + BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix1, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB1); + + // The new block unconditionally branches to the old block. + BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); + + // Move the edges from Preds to point to NewBB1 instead of OrigBB. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB1. + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); + + // Move the remaining edges from OrigBB to point to NewBB2. + SmallVector<BasicBlock*, 8> NewBB2Preds; + for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); + i != e; ) { + BasicBlock *Pred = *i++; + if (Pred == NewBB1) continue; + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + NewBB2Preds.push_back(Pred); + e = pred_end(OrigBB); + } + + BasicBlock *NewBB2 = 0; + if (!NewBB2Preds.empty()) { + // Create another basic block for the rest of OrigBB's predecessors. + NewBB2 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix2, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB2); + + // The new block unconditionally branches to the old block. + BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); + + // Move the remaining edges from OrigBB to point to NewBB2. + for (SmallVectorImpl<BasicBlock*>::iterator + i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i) + (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB2. + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); + } + + LandingPadInst *LPad = OrigBB->getLandingPadInst(); + Instruction *Clone1 = LPad->clone(); + Clone1->setName(Twine("lpad") + Suffix1); + NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); + + if (NewBB2) { + Instruction *Clone2 = LPad->clone(); + Clone2->setName(Twine("lpad") + Suffix2); + NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); + + // Create a PHI node for the two cloned landingpad instructions. + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + LPad->eraseFromParent(); + } else { + // There is no second clone. Just replace the landing pad with the first + // clone. + LPad->replaceAllUsesWith(Clone1); + LPad->eraseFromParent(); + } +} + /// FindFunctionBackedges - Analyze the specified function to find all of the /// loop backedges in the function and return them. This is a relatively cheap /// (compared to computing dominators and loop info) analysis. @@ -552,7 +645,42 @@ // Go up one level. InStack.erase(VisitStack.pop_back_val().first); } - } while (!VisitStack.empty()); - - + } while (!VisitStack.empty()); +} + +/// FoldReturnIntoUncondBranch - This method duplicates the specified return +/// instruction into a predecessor which ends in an unconditional branch. If +/// the return instruction returns a value defined by a PHI, propagate the +/// right value into the return. It returns the new return instruction in the +/// predecessor. +ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, + BasicBlock *Pred) { + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + Pred->getInstList().push_back(NewRet); + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); + i != e; ++i) + if (PHINode *PN = dyn_cast<PHINode>(*i)) + if (PN->getParent() == BB) + *i = PN->getIncomingValueForBlock(Pred); + + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + UncondBranch->eraseFromParent(); + return cast<ReturnInst>(NewRet); +} + +/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a +/// given basic block. +DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) { + if (const Instruction *I = BB->getFirstNonPHI()) + return I->getDebugLoc(); + // Scanning entire block may be too expensive, if the first instruction + // does not have valid location info. + return DebugLoc(); }
diff --git a/src/LLVM/lib/Transforms/Utils/BasicInliner.cpp b/src/LLVM/lib/Transforms/Utils/BasicInliner.cpp new file mode 100644 index 0000000..23a30cc --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/BasicInliner.cpp
@@ -0,0 +1,182 @@ +//===- BasicInliner.cpp - Basic function level inliner --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a simple function based inliner that does not use +// call graph information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "basicinliner" +#include "llvm/Module.h" +#include "llvm/Function.h" +#include "llvm/Transforms/Utils/BasicInliner.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <vector> + +using namespace llvm; + +static cl::opt<unsigned> +BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200), + cl::desc("Control the amount of basic inlining to perform (default = 200)")); + +namespace llvm { + + /// BasicInlinerImpl - BasicInliner implemantation class. This hides + /// container info, used by basic inliner, from public interface. + struct BasicInlinerImpl { + + BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT + void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT + public: + BasicInlinerImpl(TargetData *T) : TD(T) {} + + /// addFunction - Add function into the list of functions to process. + /// All functions must be inserted using this interface before invoking + /// inlineFunctions(). + void addFunction(Function *F) { + Functions.push_back(F); + } + + /// neverInlineFunction - Sometimes a function is never to be inlined + /// because of one or other reason. + void neverInlineFunction(Function *F) { + NeverInline.insert(F); + } + + /// inlineFuctions - Walk all call sites in all functions supplied by + /// client. Inline as many call sites as possible. Delete completely + /// inlined functions. + void inlineFunctions(); + + private: + TargetData *TD; + std::vector<Function *> Functions; + SmallPtrSet<const Function *, 16> NeverInline; + SmallPtrSet<Function *, 8> DeadFunctions; + InlineCostAnalyzer CA; + }; + +/// inlineFuctions - Walk all call sites in all functions supplied by +/// client. Inline as many call sites as possible. Delete completely +/// inlined functions. +void BasicInlinerImpl::inlineFunctions() { + + // Scan through and identify all call sites ahead of time so that we only + // inline call sites in the original functions, not call sites that result + // from inlining other functions. + std::vector<CallSite> CallSites; + + for (std::vector<Function *>::iterator FI = Functions.begin(), + FE = Functions.end(); FI != FE; ++FI) { + Function *F = *FI; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + CallSite CS(cast<Value>(I)); + if (CS && CS.getCalledFunction() + && !CS.getCalledFunction()->isDeclaration()) + CallSites.push_back(CS); + } + } + + DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); + + // Inline call sites. + bool Changed = false; + do { + Changed = false; + for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); + ++index) { + CallSite CS = CallSites[index]; + if (Function *Callee = CS.getCalledFunction()) { + + // Eliminate calls that are never inlinable. + if (Callee->isDeclaration() || + CS.getInstruction()->getParent()->getParent() == Callee) { + CallSites.erase(CallSites.begin() + index); + --index; + continue; + } + InlineCost IC = CA.getInlineCost(CS, NeverInline); + if (IC.isAlways()) { + DEBUG(dbgs() << " Inlining: cost=always" + <<", call: " << *CS.getInstruction()); + } else if (IC.isNever()) { + DEBUG(dbgs() << " NOT Inlining: cost=never" + <<", call: " << *CS.getInstruction()); + continue; + } else { + int Cost = IC.getValue(); + + if (Cost >= (int) BasicInlineThreshold) { + DEBUG(dbgs() << " NOT Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction()); + continue; + } else { + DEBUG(dbgs() << " Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction()); + } + } + + // Inline + InlineFunctionInfo IFI(0, TD); + if (InlineFunction(CS, IFI)) { + if (Callee->use_empty() && (Callee->hasLocalLinkage() || + Callee->hasAvailableExternallyLinkage())) + DeadFunctions.insert(Callee); + Changed = true; + CallSites.erase(CallSites.begin() + index); + --index; + } + } + } + } while (Changed); + + // Remove completely inlined functions from module. + for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(), + E = DeadFunctions.end(); I != E; ++I) { + Function *D = *I; + Module *M = D->getParent(); + M->getFunctionList().remove(D); + } +} + +BasicInliner::BasicInliner(TargetData *TD) { + Impl = new BasicInlinerImpl(TD); +} + +BasicInliner::~BasicInliner() { + delete Impl; +} + +/// addFunction - Add function into the list of functions to process. +/// All functions must be inserted using this interface before invoking +/// inlineFunctions(). +void BasicInliner::addFunction(Function *F) { + Impl->addFunction(F); +} + +/// neverInlineFunction - Sometimes a function is never to be inlined because +/// of one or other reason. +void BasicInliner::neverInlineFunction(Function *F) { + Impl->neverInlineFunction(F); +} + +/// inlineFuctions - Walk all call sites in all functions supplied by +/// client. Inline as many call sites as possible. Delete completely +/// inlined functions. +void BasicInliner::inlineFunctions() { + Impl->inlineFunctions(); +} + +}
diff --git a/src/LLVM/lib/Transforms/Utils/BreakCriticalEdges.cpp b/src/LLVM/lib/Transforms/Utils/BreakCriticalEdges.cpp index ffe6399..c052910 100644 --- a/src/LLVM/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/src/LLVM/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -11,8 +11,7 @@ // inserting a dummy basic block. This pass may be "required" by passes that // cannot deal with critical edges. For this usage, the structure type is // forward declared. This pass obviously invalidates the CFG, but can update -// forward dominator (set, immediate dominators, tree, and frontier) -// information. +// dominator trees. // //===----------------------------------------------------------------------===// @@ -36,13 +35,14 @@ namespace { struct BreakCriticalEdges : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BreakCriticalEdges() : FunctionPass(ID) {} + BreakCriticalEdges() : FunctionPass(ID) { + initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); AU.addPreserved<LoopInfo>(); AU.addPreserved<ProfileInfo>(); @@ -53,10 +53,10 @@ } char BreakCriticalEdges::ID = 0; -static RegisterPass<BreakCriticalEdges> -X("break-crit-edges", "Break critical edges in CFG"); +INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", + "Break critical edges in CFG", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); @@ -102,7 +102,7 @@ ++I; // Skip one edge due to the incoming arc from TI. if (!AllowIdenticalEdges) return I != E; - + // If AllowIdenticalEdges is true, then we allow this edge to be considered // non-critical iff all preds come from TI's block. while (I != E) { @@ -140,7 +140,7 @@ if (VP->getParent() == SplitBB) continue; // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create(PN->getType(), + PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split", SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); @@ -150,16 +150,15 @@ } /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to -/// split the critical edge. This will update DominatorTree and -/// DominatorFrontier information if it is available, thus calling this pass -/// will not invalidate either of them. This returns the new block if the edge -/// was split, null otherwise. +/// split the critical edge. This will update DominatorTree information if it +/// is available, thus calling this pass will not invalidate either of them. +/// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the -/// specified successor will be merged into the same critical edge block. -/// This is most commonly interesting with switch instructions, which may +/// specified successor will be merged into the same critical edge block. +/// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that -/// dest go to one block instead of each going to a different block, but isn't +/// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an @@ -168,19 +167,26 @@ /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, - Pass *P, bool MergeIdenticalEdges) { + Pass *P, bool MergeIdenticalEdges, + bool DontDeleteUselessPhis) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; - + assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); - + BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (DestBB->isLandingPad()) return 0; + // Create a new basic block, linking it into the CFG. - BasicBlock *NewBB = BasicBlock::Create(TI->getContext()); + BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), + TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch. - BranchInst::Create(DestBB, NewBB); + BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); + NewBI->setDebugLoc(TI->getDebugLoc()); // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); @@ -189,77 +195,54 @@ Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); - + // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. - if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { - // This conceptually does: - // foreach (PHINode *PN in DestBB) - // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); - // but is optimized for two cases. - - if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. - unsigned BBIdx = 0; - for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { - // We no longer enter through TIBB, now we come in through NewBB. - // Revector exactly one entry in the PHI node that used to come from - // TIBB to come from NewBB. - PHINode *PN = cast<PHINode>(I); - - // Reuse the previous value of BBIdx if it lines up. In cases where we - // have multiple phi nodes with *lots* of predecessors, this is a speed - // win because we don't have to scan the PHI looking for TIBB. This - // happens because the BB list of PHI nodes are usually in the same - // order. - if (PN->getIncomingBlock(BBIdx) != TIBB) - BBIdx = PN->getBasicBlockIndex(TIBB); - PN->setIncomingBlock(BBIdx, NewBB); - } - } else { - // However, the foreach loop is slow for blocks with lots of predecessors - // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk - // the user list of TIBB to find the PHI nodes. - SmallPtrSet<PHINode*, 16> UpdatedPHIs; - - for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); - UI != E; ) { - Value::use_iterator Use = UI++; - if (PHINode *PN = dyn_cast<PHINode>(*Use)) { - // Remove one entry from each PHI. - if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) - PN->setOperand(Use.getOperandNo(), NewBB); - } - } + { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); } } - + // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; - + // Remove an entry for TIBB from DestBB phi nodes. - DestBB->removePredecessor(TIBB); - + DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); + // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } - - + + // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; - + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); - DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); - + // If we have nothing to update, just return. - if (DT == 0 && DF == 0 && LI == 0 && PI == 0) + if (DT == 0 && LI == 0 && PI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is @@ -280,12 +263,12 @@ I != E; ++I) { BasicBlock *P = *I; if (P != NewBB) - OtherPreds.push_back(P); + OtherPreds.push_back(P); } } bool NewBBDominatesDestBB = true; - + // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); @@ -296,7 +279,7 @@ if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; - + // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); @@ -307,7 +290,7 @@ } OtherPreds.clear(); } - + // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { @@ -317,40 +300,6 @@ } } - // Should we update DominanceFrontier information? - if (DF) { - // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. - if (!OtherPreds.empty()) { - // FIXME: IMPLEMENT THIS! - llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." - " not implemented yet!"); - } - - // Since the new block is dominated by its only predecessor TIBB, - // it cannot be in any block's dominance frontier. If NewBB dominates - // DestBB, its dominance frontier is the same as DestBB's, otherwise it is - // just {DestBB}. - DominanceFrontier::DomSetType NewDFSet; - if (NewBBDominatesDestBB) { - DominanceFrontier::iterator I = DF->find(DestBB); - if (I != DF->end()) { - DF->addBasicBlock(NewBB, I->second); - - if (I->second.count(DestBB)) { - // However NewBB's frontier does not include DestBB. - DominanceFrontier::iterator NF = DF->find(NewBB); - DF->removeFromFrontier(NF, DestBB); - } - } - else - DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); - } else { - DominanceFrontier::DomSetType NewDFSet; - NewDFSet.insert(DestBB); - DF->addBasicBlock(NewBB, NewDFSet); - } - } - // Update LoopInfo if it is around. if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { @@ -393,6 +342,8 @@ } // For each unique exit block... + // FIXME: This code is functionally equivalent to the corresponding + // loop in LoopSimplify. SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { @@ -404,10 +355,15 @@ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; - if (TIL->contains(P)) + if (TIL->contains(P)) { + if (isa<IndirectBrInst>(P->getTerminator())) { + Preds.clear(); + break; + } Preds.push_back(P); - else + } else { HasPredOutsideOfLoop = true; + } } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block @@ -417,7 +373,7 @@ if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), - P); + "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); }
diff --git a/src/LLVM/lib/Transforms/Utils/BuildLibCalls.cpp b/src/LLVM/lib/Transforms/Utils/BuildLibCalls.cpp new file mode 100644 index 0000000..4b5f45b --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,479 @@ +//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some functions that will create standard C libcalls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Type.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/LLVMContext.h" +#include "llvm/Intrinsics.h" + +using namespace llvm; + +/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. +Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { + return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr"); +} + +/// EmitStrLen - Emit a call to the strlen function to the builder, for the +/// specified pointer. This always returns an integer value of size intptr_t. +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitStrChr - Emit a call to the strchr function to the builder, for the +/// specified pointer and character. Ptr is required to be some pointer type, +/// and the return value has 'i8*' type. +Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI = + AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + + Type *I8Ptr = B.getInt8PtrTy(); + Type *I32Ty = B.getInt32Ty(); + Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), + I8Ptr, I8Ptr, I32Ty, NULL); + CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), + ConstantInt::get(I32Ty, C), "strchr"); + if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitStrNCmp - Emit a call to the strncmp function to the builder. +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), + CastToCStr(Ptr2, B), Len, "strncmp"); + + if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the +/// specified pointer arguments. +Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, + const TargetData *TD, StringRef Name) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Type *I8Ptr = B.getInt8PtrTy(); + Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), + I8Ptr, I8Ptr, I8Ptr, NULL); + CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), + Name); + if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the +/// specified pointer arguments. +Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, + IRBuilder<> &B, const TargetData *TD, StringRef Name) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Type *I8Ptr = B.getInt8PtrTy(); + Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), + I8Ptr, I8Ptr, I8Ptr, + Len->getType(), NULL); + CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), + Len, "strncpy"); + if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. +/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src +/// are pointers. +Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI; + AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", + AttrListPtr::get(&AWI, 1), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), NULL); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); + if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is +/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. +Value *llvm::EmitMemChr(Value *Ptr, Value *Val, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI; + AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + B.getInt32Ty(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + + if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitMemCmp - Emit a call to the memcmp function. +Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), + Len, "memcmp"); + + if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. +/// 'floor'). This function is known to take a single of type matching 'Op' and +/// returns one value with the same type. If 'Op' is a long double, 'l' is +/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. +Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name, + IRBuilder<> &B, const AttrListPtr &Attrs) { + char NameBuffer[20]; + if (!Op->getType()->isDoubleTy()) { + // If we need to add a suffix, copy into NameBuffer. + unsigned NameLen = strlen(Name); + assert(NameLen < sizeof(NameBuffer)-2); + memcpy(NameBuffer, Name, NameLen); + if (Op->getType()->isFloatTy()) + NameBuffer[NameLen] = 'f'; // floorf + else + NameBuffer[NameLen] = 'l'; // floorl + NameBuffer[NameLen+1] = 0; + Name = NameBuffer; + } + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), NULL); + CallInst *CI = B.CreateCall(Callee, Op, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitPutChar - Emit a call to the putchar function. This assumes that Char +/// is an integer. +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall(PutChar, + B.CreateIntCast(Char, + B.getInt32Ty(), + /*isSigned*/true, + "chari"), + "putchar"); + + if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitPutS - Emit a call to the puts function. This assumes that Str is +/// some pointer. +void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + + Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), + B.getInt32Ty(), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); + if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + +} + +/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is +/// an integer and File is a pointer to FILE. +void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), + B.getInt32Ty(), + B.getInt32Ty(), File->getType(), + NULL); + else + F = M->getOrInsertFunction("fputc", + B.getInt32Ty(), + B.getInt32Ty(), + File->getType(), NULL); + Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, + "chari"); + CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} + +/// EmitFPutS - Emit a call to the puts function. Str is required to be a +/// pointer and File is a pointer to FILE. +void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fputs", B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} + +/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is +/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. +void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(TD->getIntPtrType(Context), 1), File); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} + +SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } + +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { + // We really need TargetData for later. + if (!TD) return false; + + this->CI = CI; + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + IRBuilder<> B(CI); + + if (Name == "__memcpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + replaceCall(CI->getArgOperand(0)); + return true; + } + return false; + } + + // Should be similar to memcpy. + if (Name == "__mempcpy_chk") { + return false; + } + + if (Name == "__memmove_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + replaceCall(CI->getArgOperand(0)); + return true; + } + return false; + } + + if (Name == "__memset_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), + false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + replaceCall(CI->getArgOperand(0)); + return true; + } + return false; + } + + if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(Context)) + return 0; + + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our + // st[rp]cpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, + Name.substr(2, 6)); + replaceCall(Ret); + return true; + } + return false; + } + + if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + !FT->getParamType(2)->isIntegerTy() || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + + if (isFoldable(3, 2, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, Name.substr(2, 7)); + replaceCall(Ret); + return true; + } + return false; + } + + if (Name == "__strcat_chk") { + return false; + } + + if (Name == "__strncat_chk") { + return false; + } + + return false; +}
diff --git a/src/LLVM/lib/Transforms/Utils/CloneFunction.cpp b/src/LLVM/lib/Transforms/Utils/CloneFunction.cpp new file mode 100644 index 0000000..cf21f1e --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,533 @@ +//===- CloneFunction.cpp - Clone a function into another function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneFunctionInto interface, which is used as the +// low-level function cloner. This is used by the CloneFunction and function +// inliner to do the dirty work of copying the body of a function around. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/SmallVector.h" +#include <map> +using namespace llvm; + +// CloneBasicBlock - See comments in Cloning.h +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, + ValueToValueMapTy &VMap, + const Twine &NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo) { + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over. + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + VMap[II] = NewInst; // Add instruction map to value. + + hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsUnwinds |= isa<UnwindInst>(BB->getTerminator()); + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->getEntryBlock(); + } + return NewBB; +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// VMap values. +// +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); I != E; ++I) + assert(VMap.count(I) && "No mapping from source argument specified!"); +#endif + + // Clone any attributes. + if (NewFunc->arg_size() == OldFunc->arg_size()) + NewFunc->copyAttributesFrom(OldFunc); + else { + //Some arguments were deleted with the VMap. Copy arguments one by one + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); I != E; ++I) + if (Argument* Anew = dyn_cast<Argument>(VMap[I])) + Anew->addAttr( OldFunc->getAttributes() + .getParamAttributes(I->getArgNo() + 1)); + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttr(0, OldFunc->getAttributes() + .getRetAttributes())); + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttr(~0, OldFunc->getAttributes() + .getFnAttributes())); + + } + + // Loop over all of the basic blocks in the function, cloning them as + // appropriate. Note that we save BE this way in order to handle cloning of + // recursive functions into themselves. + // + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + VMap[&BB] = CBB; // Add basic block mapping. + + if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) + Returns.push_back(RI); + } + + // Loop over all of the instructions in the function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]), + BE = NewFunc->end(); BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) + RemapInstruction(II, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); +} + +/// CloneFunction - Return a copy of the specified function, but without +/// embedding the function into another module. Also, any references specified +/// in the VMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the VMap, +/// the arguments are deleted from the resultant function. The VMap is +/// updated to include mappings from all of the instructions and basicblocks in +/// the function from their old to new values. +/// +Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + ClonedCodeInfo *CodeInfo) { + std::vector<Type*> ArgTypes; + + // The user might be deleting arguments to the function by specifying them in + // the VMap. If so, we need to not add the arguments to the arg ty vector + // + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I->getType()); + + // Create a new function type... + FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), + ArgTypes, F->getFunctionType()->isVarArg()); + + // Create the new function... + Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName()); + + // Loop over the arguments, copying the names of the mapped arguments over... + Function::arg_iterator DestI = NewF->arg_begin(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (VMap.count(I) == 0) { // Is this argument preserved? + DestI->setName(I->getName()); // Copy the name over... + VMap[I] = DestI++; // Add mapping to VMap + } + + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo); + return NewF; +} + + + +namespace { + /// PruningFunctionCloner - This class is a private class used to implement + /// the CloneAndPruneFunctionInto method. + struct PruningFunctionCloner { + Function *NewFunc; + const Function *OldFunc; + ValueToValueMapTy &VMap; + bool ModuleLevelChanges; + SmallVectorImpl<ReturnInst*> &Returns; + const char *NameSuffix; + ClonedCodeInfo *CodeInfo; + const TargetData *TD; + public: + PruningFunctionCloner(Function *newFunc, const Function *oldFunc, + ValueToValueMapTy &valueMap, + bool moduleLevelChanges, + SmallVectorImpl<ReturnInst*> &returns, + const char *nameSuffix, + ClonedCodeInfo *codeInfo, + const TargetData *td) + : NewFunc(newFunc), OldFunc(oldFunc), + VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), + Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + } + + /// CloneBlock - The specified block is found to be reachable, clone it and + /// anything that it can reach. + void CloneBlock(const BasicBlock *BB, + std::vector<const BasicBlock*> &ToClone); + + public: + /// ConstantFoldMappedInstruction - Constant fold the specified instruction, + /// mapping its operands through VMap if they are available. + Constant *ConstantFoldMappedInstruction(const Instruction *I); + }; +} + +/// CloneBlock - The specified block is found to be reachable, clone it and +/// anything that it can reach. +void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, + std::vector<const BasicBlock*> &ToClone){ + TrackingVH<Value> &BBEntry = VMap[BB]; + + // Have we already cloned this block? + if (BBEntry) return; + + // Nope, clone it now. + BasicBlock *NewBB; + BBEntry = NewBB = BasicBlock::Create(BB->getContext()); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over, DCE'ing as we go. This + // loop doesn't include the terminator. + for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); + II != IE; ++II) { + // If this instruction constant folds, don't bother cloning the instruction, + // instead, just add the constant to the value map. + if (Constant *C = ConstantFoldMappedInstruction(II)) { + VMap[II] = C; + continue; + } + + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + VMap[II] = NewInst; // Add instruction map to value. + + hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + // Finally, clone over the terminator. + const TerminatorInst *OldTI = BB->getTerminator(); + bool TerminatorDone = false; + if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { + if (BI->isConditional()) { + // If the condition was a known constant in the callee... + ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); + // Or is a known constant in the caller... + if (Cond == 0) { + Value *V = VMap[BI->getCondition()]; + Cond = dyn_cast_or_null<ConstantInt>(V); + } + + // Constant fold to uncond branch! + if (Cond) { + BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { + // If switching on a value known constant in the caller. + ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); + if (Cond == 0) { // Or known constant after constant prop in the callee... + Value *V = VMap[SI->getCondition()]; + Cond = dyn_cast_or_null<ConstantInt>(V); + } + if (Cond) { // Constant fold to uncond branch! + BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + + if (!TerminatorDone) { + Instruction *NewInst = OldTI->clone(); + if (OldTI->hasName()) + NewInst->setName(OldTI->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + VMap[OldTI] = NewInst; // Add instruction map to value. + + // Recursively clone any reachable successor blocks. + const TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + ToClone.push_back(TI->getSuccessor(i)); + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsUnwinds |= isa<UnwindInst>(OldTI); + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->front(); + } + + if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator())) + Returns.push_back(RI); +} + +/// ConstantFoldMappedInstruction - Constant fold the specified instruction, +/// mapping its operands through VMap if they are available. +Constant *PruningFunctionCloner:: +ConstantFoldMappedInstruction(const Instruction *I) { + SmallVector<Constant*, 8> Ops; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), + VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges))) + Ops.push_back(Op); + else + return 0; // All operands not constant! + + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], + TD); + + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(Ops[0], TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); +} + +/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exact copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + const TargetData *TD, + Instruction *TheCall) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator II = OldFunc->arg_begin(), + E = OldFunc->arg_end(); II != E; ++II) + assert(VMap.count(II) && "No mapping from source argument specified!"); +#endif + + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, + Returns, NameSuffix, CodeInfo, TD); + + // Clone the entry block, and anything recursively reachable from it. + std::vector<const BasicBlock*> CloneWorklist; + CloneWorklist.push_back(&OldFunc->getEntryBlock()); + while (!CloneWorklist.empty()) { + const BasicBlock *BB = CloneWorklist.back(); + CloneWorklist.pop_back(); + PFC.CloneBlock(BB, CloneWorklist); + } + + // Loop over all of the basic blocks in the old function. If the block was + // reachable, we have cloned it and the old block is now in the value map: + // insert it into the new function in the right order. If not, ignore it. + // + // Defer PHI resolution until rest of function is resolved. + SmallVector<const PHINode*, 16> PHIToResolve; + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + Value *V = VMap[BI]; + BasicBlock *NewBB = cast_or_null<BasicBlock>(V); + if (NewBB == 0) continue; // Dead block. + + // Add the new block to the new function. + NewFunc->getBasicBlockList().push_back(NewBB); + + // Loop over all of the instructions in the block, fixing up operand + // references as we go. This uses VMap to do all the hard work. + // + BasicBlock::iterator I = NewBB->begin(); + + DebugLoc TheCallDL; + if (TheCall) + TheCallDL = TheCall->getDebugLoc(); + + // Handle PHI nodes specially, as we have to remove references to dead + // blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + // Skip over all PHI nodes, remembering them for later. + BasicBlock::const_iterator OldI = BI->begin(); + for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) + PHIToResolve.push_back(cast<PHINode>(OldI)); + } + + // Otherwise, remap the rest of the instructions normally. + for (; I != NewBB->end(); ++I) + RemapInstruction(I, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + } + + // Defer PHI resolution until rest of function is resolved, PHI resolution + // requires the CFG to be up-to-date. + for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { + const PHINode *OPN = PHIToResolve[phino]; + unsigned NumPreds = OPN->getNumIncomingValues(); + const BasicBlock *OldBB = OPN->getParent(); + BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); + + // Map operands for blocks that are live and remove operands for blocks + // that are dead. + for (; phino != PHIToResolve.size() && + PHIToResolve[phino]->getParent() == OldBB; ++phino) { + OPN = PHIToResolve[phino]; + PHINode *PN = cast<PHINode>(VMap[OPN]); + for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { + Value *V = VMap[PN->getIncomingBlock(pred)]; + if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { + Value *InVal = MapValue(PN->getIncomingValue(pred), + VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + assert(InVal && "Unknown input value?"); + PN->setIncomingValue(pred, InVal); + PN->setIncomingBlock(pred, MappedBlock); + } else { + PN->removeIncomingValue(pred, false); + --pred, --e; // Revisit the next entry. + } + } + } + + // The loop above has removed PHI entries for those blocks that are dead + // and has updated others. However, if a block is live (i.e. copied over) + // but its terminator has been changed to not go to this block, then our + // phi nodes will have invalid entries. Update the PHI nodes in this + // case. + PHINode *PN = cast<PHINode>(NewBB->begin()); + NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); + if (NumPreds != PN->getNumIncomingValues()) { + assert(NumPreds < PN->getNumIncomingValues()); + // Count how many times each predecessor comes to this block. + std::map<BasicBlock*, unsigned> PredCount; + for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); + PI != E; ++PI) + --PredCount[*PI]; + + // Figure out how many entries to remove from each PHI. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + ++PredCount[PN->getIncomingBlock(i)]; + + // At this point, the excess predecessor entries are positive in the + // map. Loop over all of the PHIs and remove excess predecessor + // entries. + BasicBlock::iterator I = NewBB->begin(); + for (; (PN = dyn_cast<PHINode>(I)); ++I) { + for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), + E = PredCount.end(); PCI != E; ++PCI) { + BasicBlock *Pred = PCI->first; + for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) + PN->removeIncomingValue(Pred, false); + } + } + } + + // If the loops above have made these phi nodes have 0 or 1 operand, + // replace them with undef or the input value. We must do this for + // correctness, because 0-operand phis are not valid. + PN = cast<PHINode>(NewBB->begin()); + if (PN->getNumIncomingValues() == 0) { + BasicBlock::iterator I = NewBB->begin(); + BasicBlock::const_iterator OldI = OldBB->begin(); + while ((PN = dyn_cast<PHINode>(I++))) { + Value *NV = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NV); + assert(VMap[OldI] == PN && "VMap mismatch"); + VMap[OldI] = NV; + PN->eraseFromParent(); + ++OldI; + } + } + // NOTE: We cannot eliminate single entry phi nodes here, because of + // VMap. Single entry phi nodes can have multiple VMap entries + // pointing at them. Thus, deleting one would require scanning the VMap + // to update any entries in it that would require that. This would be + // really slow. + } + + // Now that the inlined function body has been fully constructed, go through + // and zap unconditional fall-through branches. This happen all the time when + // specializing code: code specialization turns conditional branches into + // uncond branches, and this code folds them. + Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + while (I != NewFunc->end()) { + BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); + if (!BI || BI->isConditional()) { ++I; continue; } + + // Note that we can't eliminate uncond branches if the destination has + // single-entry PHI nodes. Eliminating the single-entry phi nodes would + // require scanning the VMap to update any entries that point to the phi + // node. + BasicBlock *Dest = BI->getSuccessor(0); + if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { + ++I; continue; + } + + // We know all single-entry PHI nodes in the inlined function have been + // removed, so we just need to splice the blocks. + BI->eraseFromParent(); + + // Make all PHI nodes that referred to Dest now refer to I as their source. + Dest->replaceAllUsesWith(I); + + // Move all the instructions in the succ to the pred. + I->getInstList().splice(I->end(), Dest->getInstList()); + + // Remove the dest block. + Dest->eraseFromParent(); + + // Do not increment I, iteratively merge all things this block branches to. + } +}
diff --git a/src/LLVM/lib/Transforms/Utils/CloneModule.cpp b/src/LLVM/lib/Transforms/Utils/CloneModule.cpp new file mode 100644 index 0000000..a0e027b --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,126 @@ +//===- CloneModule.cpp - Clone an entire module ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneModule interface which makes a copy of an +// entire module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constant.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +/// CloneModule - Return an exact copy of the specified module. This is not as +/// easy as it might seem because we have to worry about making copies of global +/// variables and functions, and making their (initializers and references, +/// respectively) refer to the right globals. +/// +Module *llvm::CloneModule(const Module *M) { + // Create the value map that maps things from the old module over to the new + // module. + ValueToValueMapTy VMap; + return CloneModule(M, VMap); +} + +Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { + // First off, we need to create the new module. + Module *New = new Module(M->getModuleIdentifier(), M->getContext()); + New->setDataLayout(M->getDataLayout()); + New->setTargetTriple(M->getTargetTriple()); + New->setModuleInlineAsm(M->getModuleInlineAsm()); + + // Copy all of the dependent libraries over. + for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) + New->addLibrary(*I); + + // Loop over all of the global variables, making corresponding globals in the + // new module. Here we add them to the VMap and to the new Module. We + // don't worry about attributes or initializers, they will come later. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = new GlobalVariable(*New, + I->getType()->getElementType(), + I->isConstant(), I->getLinkage(), + (Constant*) 0, I->getName(), + (GlobalVariable*) 0, + I->isThreadLocal(), + I->getType()->getAddressSpace()); + GV->copyAttributesFrom(I); + VMap[I] = GV; + } + + // Loop over the functions in the module, making external functions as before + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *NF = + Function::Create(cast<FunctionType>(I->getType()->getElementType()), + I->getLinkage(), I->getName(), New); + NF->copyAttributesFrom(I); + VMap[I] = NF; + } + + // Loop over the aliases in the module + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(), + I->getName(), NULL, New); + GA->copyAttributesFrom(I); + VMap[I] = GA; + } + + // Now that all of the things that global variable initializer can refer to + // have been created, loop through and copy the global variable referrers + // over... We also set the attributes on the global now. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); + if (I->hasInitializer()) + GV->setInitializer(MapValue(I->getInitializer(), VMap)); + } + + // Similarly, copy over function bodies now... + // + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *F = cast<Function>(VMap[I]); + if (!I->isDeclaration()) { + Function::arg_iterator DestI = F->arg_begin(); + for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); + ++J) { + DestI->setName(J->getName()); + VMap[J] = DestI++; + } + + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. + CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); + } + } + + // And aliases + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); + if (const Constant *C = I->getAliasee()) + GA->setAliasee(MapValue(C, VMap)); + } + + // And named metadata.... + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode &NMD = *I; + NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap)); + } + + return New; +}
diff --git a/src/LLVM/lib/Transforms/Utils/CodeExtractor.cpp b/src/LLVM/lib/Transforms/Utils/CodeExtractor.cpp new file mode 100644 index 0000000..5f47ebb --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,796 @@ +//===- CodeExtractor.cpp - Pull code region into a new function -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface to tear out a code region, such as an +// individual loop or a parallel section, into a new function, replacing it with +// a call to the new function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <set> +using namespace llvm; + +// Provide a command-line option to aggregate function arguments into a struct +// for functions produced by the code extractor. This is useful when converting +// extracted functions to pthread-based code, as only one argument (void*) can +// be passed in to pthread_create(). +static cl::opt<bool> +AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, + cl::desc("Aggregate arguments to code-extracted functions")); + +namespace { + class CodeExtractor { + typedef SetVector<Value*> Values; + SetVector<BasicBlock*> BlocksToExtract; + DominatorTree* DT; + bool AggregateArgs; + unsigned NumExitBlocks; + Type *RetTy; + public: + CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false) + : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {} + + Function *ExtractCodeRegion(ArrayRef<BasicBlock*> code); + + bool isEligible(ArrayRef<BasicBlock*> code); + + private: + /// definedInRegion - Return true if the specified value is defined in the + /// extracted region. + bool definedInRegion(Value *V) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (BlocksToExtract.count(I->getParent())) + return true; + return false; + } + + /// definedInCaller - Return true if the specified value is defined in the + /// function being code extracted, but not in the region being extracted. + /// These values must be passed in as live-ins to the function. + bool definedInCaller(Value *V) const { + if (isa<Argument>(V)) return true; + if (Instruction *I = dyn_cast<Instruction>(V)) + if (!BlocksToExtract.count(I->getParent())) + return true; + return false; + } + + void severSplitPHINodes(BasicBlock *&Header); + void splitReturnBlocks(); + void findInputsOutputs(Values &inputs, Values &outputs); + + Function *constructFunction(const Values &inputs, + const Values &outputs, + BasicBlock *header, + BasicBlock *newRootNode, BasicBlock *newHeader, + Function *oldFunction, Module *M); + + void moveCodeToFunction(Function *newFunction); + + void emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *newHeader, + Values &inputs, + Values &outputs); + + }; +} + +/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the +/// region, we need to split the entry block of the region so that the PHI node +/// is easier to deal with. +void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { + unsigned NumPredsFromRegion = 0; + unsigned NumPredsOutsideRegion = 0; + + if (Header != &Header->getParent()->getEntryBlock()) { + PHINode *PN = dyn_cast<PHINode>(Header->begin()); + if (!PN) return; // No PHI nodes. + + // If the header node contains any PHI nodes, check to see if there is more + // than one entry from outside the region. If so, we need to sever the + // header block into two. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) + ++NumPredsFromRegion; + else + ++NumPredsOutsideRegion; + + // If there is one (or fewer) predecessor from outside the region, we don't + // need to do anything special. + if (NumPredsOutsideRegion <= 1) return; + } + + // Otherwise, we need to split the header block into two pieces: one + // containing PHI nodes merging values from outside of the region, and a + // second that contains all of the code for the block and merges back any + // incoming values from inside of the region. + BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); + BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, + Header->getName()+".ce"); + + // We only want to code extract the second block now, and it becomes the new + // header of the region. + BasicBlock *OldPred = Header; + BlocksToExtract.remove(OldPred); + BlocksToExtract.insert(NewBB); + Header = NewBB; + + // Okay, update dominator sets. The blocks that dominate the new one are the + // blocks that dominate TIBB plus the new block itself. + if (DT) + DT->splitBlock(NewBB); + + // Okay, now we need to adjust the PHI nodes and any branches from within the + // region to go to the new header block instead of the old header block. + if (NumPredsFromRegion) { + PHINode *PN = cast<PHINode>(OldPred->begin()); + // Loop over all of the predecessors of OldPred that are in the region, + // changing them to branch to NewBB instead. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); + TI->replaceUsesOfWith(OldPred, NewBB); + } + + // Okay, everything within the region is now branching to the right block, we + // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { + PHINode *PN = cast<PHINode>(AfterPHIs); + // Create a new PHI node in the new region, which has an incoming value + // from OldPred of PN. + PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, + PN->getName()+".ce", NewBB->begin()); + NewPN->addIncoming(PN, OldPred); + + // Loop over all of the incoming value in PN, moving them to NewPN if they + // are from the extracted region. + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); + PN->removeIncomingValue(i); + --i; + } + } + } + } +} + +void CodeExtractor::splitReturnBlocks() { + for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(), + E = BlocksToExtract.end(); I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { + BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + if (DT) { + // Old dominates New. New node dominates all other nodes dominated + // by Old. + DomTreeNode *OldNode = DT->getNode(*I); + SmallVector<DomTreeNode*, 8> Children; + for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end(); + DI != DE; ++DI) + Children.push_back(*DI); + + DomTreeNode *NewNode = DT->addNewBlock(New, *I); + + for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + DT->changeImmediateDominator(*I, NewNode); + } + } +} + +// findInputsOutputs - Find inputs to, outputs from the code region. +// +void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { + std::set<BasicBlock*> ExitBlocks; + for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(), + ce = BlocksToExtract.end(); ci != ce; ++ci) { + BasicBlock *BB = *ci; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O) + if (definedInCaller(*O)) + inputs.insert(*O); + + // Consider uses of this instruction (outputs). + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + if (!definedInRegion(*UI)) { + outputs.insert(I); + break; + } + } // for: insts + + // Keep track of the exit blocks from the region. + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!BlocksToExtract.count(TI->getSuccessor(i))) + ExitBlocks.insert(TI->getSuccessor(i)); + } // for: basic blocks + + NumExitBlocks = ExitBlocks.size(); +} + +/// constructFunction - make a function based on inputs and outputs, as follows: +/// f(in0, ..., inN, out0, ..., outN) +/// +Function *CodeExtractor::constructFunction(const Values &inputs, + const Values &outputs, + BasicBlock *header, + BasicBlock *newRootNode, + BasicBlock *newHeader, + Function *oldFunction, + Module *M) { + DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; + } + + std::vector<Type*> paramTy; + + // Add the types of the input values to the function's argument list + for (Values::const_iterator i = inputs.begin(), + e = inputs.end(); i != e; ++i) { + const Value *value = *i; + DEBUG(dbgs() << "value used in func: " << *value << "\n"); + paramTy.push_back(value->getType()); + } + + // Add the types of the output values to the function's argument list. + for (Values::const_iterator I = outputs.begin(), E = outputs.end(); + I != E; ++I) { + DEBUG(dbgs() << "instr used in func: " << **I << "\n"); + if (AggregateArgs) + paramTy.push_back((*I)->getType()); + else + paramTy.push_back(PointerType::getUnqual((*I)->getType())); + } + + DEBUG(dbgs() << "Function type: " << *RetTy << " f("); + for (std::vector<Type*>::iterator i = paramTy.begin(), + e = paramTy.end(); i != e; ++i) + DEBUG(dbgs() << **i << ", "); + DEBUG(dbgs() << ")\n"); + + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + PointerType *StructPtr = + PointerType::getUnqual(StructType::get(M->getContext(), paramTy)); + paramTy.clear(); + paramTy.push_back(StructPtr); + } + FunctionType *funcType = + FunctionType::get(RetTy, paramTy, false); + + // Create the new function + Function *newFunction = Function::Create(funcType, + GlobalValue::InternalLinkage, + oldFunction->getName() + "_" + + header->getName(), M); + // If the old function is no-throw, so is the new one. + if (oldFunction->doesNotThrow()) + newFunction->setDoesNotThrow(true); + + newFunction->getBasicBlockList().push_back(newRootNode); + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); + TerminatorInst *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = AI++; + + std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end()); + for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end(); + use != useE; ++use) + if (Instruction* inst = dyn_cast<Instruction>(*use)) + if (BlocksToExtract.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector<User*> Users(header->use_begin(), header->use_end()); + for (unsigned i = 0, e = Users.size(); i != e; ++i) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i])) + if (!BlocksToExtract.count(TI->getParent()) && + TI->getParent()->getParent() == oldFunction) + TI->replaceUsesOfWith(header, newHeader); + + return newFunction; +} + +/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI +/// that uses the value within the basic block, and return the predecessor +/// block associated with that use, or return 0 if none is found. +static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { + for (Value::use_iterator UI = Used->use_begin(), + UE = Used->use_end(); UI != UE; ++UI) { + PHINode *P = dyn_cast<PHINode>(*UI); + if (P && P->getParent() == BB) + return P->getIncomingBlock(UI); + } + + return 0; +} + +/// emitCallAndSwitchStatement - This method sets up the caller side by adding +/// the call instruction, splitting any PHI nodes in the header block as +/// necessary. +void CodeExtractor:: +emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, + Values &inputs, Values &outputs) { + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; + + LLVMContext &Context = newFunction->getContext(); + + // Add inputs as params, or to be filled into the struct + for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) + if (AggregateArgs) + StructValues.push_back(*i); + else + params.push_back(*i); + + // Create allocas for the outputs + for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) { + if (AggregateArgs) { + StructValues.push_back(*i); + } else { + AllocaInst *alloca = + new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc", + codeReplacer->getParent()->begin()->begin()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + AllocaInst *Struct = 0; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector<Type*> ArgTypes; + for (Values::iterator v = StructValues.begin(), + ve = StructValues.end(); v != ve; ++v) + ArgTypes.push_back((*v)->getType()); + + // Allocate a struct at the beginning of this function + Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + Struct = + new AllocaInst(StructArgTy, 0, "structArg", + codeReplacer->getParent()->begin()->begin()); + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(Struct, Idx, + "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + StoreInst *SI = new StoreInst(StructValues[i], GEP); + codeReplacer->getInstList().push_back(SI); + } + } + + // Emit the call to the function + CallInst *call = CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : ""); + codeReplacer->getInstList().push_back(call); + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Reload the outputs passed in by reference + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = 0; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP + = GetElementPtrInst::Create(Struct, Idx, + "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + Reloads.push_back(load); + codeReplacer->getInstList().push_back(load); + std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast<Instruction>(Users[u]); + if (!BlocksToExtract.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map<BasicBlock*, BasicBlock*> ExitBlockMap; + + unsigned switchVal = 0; + for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), + e = BlocksToExtract.end(); i != e; ++i) { + TerminatorInst *TI = (*i)->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!BlocksToExtract.count(TI->getSuccessor(i))) { + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (!NewTarget) { + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", + newFunction); + unsigned SuccNum = switchVal++; + + Value *brVal = 0; + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), + OldTarget); + + // Restore values just before we exit + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned out = 0, e = outputs.size(); out != e; ++out) { + // For an invoke, the normal destination is the only one that is + // dominated by the result of the invocation + BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent(); + + bool DominatesDef = true; + + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) { + DefBlock = Invoke->getNormalDest(); + + // Make sure we are looking at the original successor block, not + // at a newly inserted exit block, which won't be in the dominator + // info. + for (std::map<BasicBlock*, BasicBlock*>::iterator I = + ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I) + if (DefBlock == I->second) { + DefBlock = I->first; + break; + } + + // In the extract block case, if the block we are extracting ends + // with an invoke instruction, make sure that we don't emit a + // store of the invoke value for the unwind block. + if (!DT && DefBlock != OldTarget) + DominatesDef = false; + } + + if (DT) { + DominatesDef = DT->dominates(DefBlock, OldTarget); + + // If the output value is used by a phi in the target block, + // then we need to test for dominance of the phi's predecessor + // instead. Unfortunately, this a little complicated since we + // have already rewritten uses of the value to uses of the reload. + BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], + OldTarget); + if (pred && DT && DT->dominates(DefBlock, pred)) + DominatesDef = true; + } + + if (DominatesDef) { + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), + FirstOut+out); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(OAI, Idx, + "gep_" + outputs[out]->getName(), + NTRet); + new StoreInst(outputs[out], GEP, NTRet); + } else { + new StoreInst(outputs[out], OAI, NTRet); + } + } + // Advance output iterator even if we don't emit a store + if (!AggregateArgs) ++OAI; + } + } + + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, 0, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setOperand(0, call); + TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks)); + TheSwitch->removeCase(NumExitBlocks); // Remove redundant case + break; + } +} + +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + Function *oldFunc = (*BlocksToExtract.begin())->getParent(); + Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + + for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), + e = BlocksToExtract.end(); i != e; ++i) { + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(*i); + + // Insert this basic block into the new function + newBlocks.push_back(*i); + } +} + +/// ExtractRegion - Removes a loop from a function, replaces it with a call to +/// new function. Returns pointer to the new function. +/// +/// algorithm: +/// +/// find inputs and outputs for the region +/// +/// for inputs: add to function as args, map input instr* to arg# +/// for outputs: add allocas for scalars, +/// add to func as args, map output instr* to arg# +/// +/// rewrite func to use argument #s instead of instr* +/// +/// for each scalar output in the function: at every exit, store intermediate +/// computed result back into memory. +/// +Function *CodeExtractor:: +ExtractCodeRegion(ArrayRef<BasicBlock*> code) { + if (!isEligible(code)) + return 0; + + // 1) Find inputs, outputs + // 2) Construct new function + // * Add allocas for defs, pass as args by reference + // * Pass in uses as args + // 3) Move code region, add call instr to func + // + BlocksToExtract.insert(code.begin(), code.end()); + + Values inputs, outputs; + + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = code[0]; + + for (unsigned i = 1, e = code.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]); + PI != E; ++PI) + assert(BlocksToExtract.count(*PI) && + "No blocks in this region may have entries from outside the region" + " except for the first block!"); + + // If we have to split PHI nodes or the entry block, do so now. + severSplitPHINodes(header); + + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + Function *oldFunction = header->getParent(); + + // This takes place of the original loop + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, + header); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + "newFuncRoot"); + newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs); + + // Construct new function based on inputs/outputs & add allocas for all defs. + Function *newFunction = constructFunction(inputs, outputs, header, + newFuncRoot, + codeReplacer, oldFunction, + oldFunction->getParent()); + + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + + moveCodeToFunction(newFunction); + + // Loop over all of the PHI nodes in the header block, and change any + // references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!BlocksToExtract.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + // Look at all successors of the codeReplacer block. If any of these blocks + // had PHI nodes in them, we need to update the "from" block to be the code + // replacer, not the original block in the extracted region. + std::vector<BasicBlock*> Succs(succ_begin(codeReplacer), + succ_end(codeReplacer)); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + std::set<BasicBlock*> ProcessedPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) + PN->setIncomingBlock(i, codeReplacer); + else { + // There were multiple entries in the PHI for this block, now there + // is only one, so remove the duplicated entries. + PN->removeIncomingValue(i, false); + --i; --e; + } + } + } + + //cerr << "NEW FUNCTION: " << *newFunction; + // verifyFunction(*newFunction); + + // cerr << "OLD FUNCTION: " << *oldFunction; + // verifyFunction(*oldFunction); + + DEBUG(if (verifyFunction(*newFunction)) + report_fatal_error("verifyFunction failed!")); + return newFunction; +} + +bool CodeExtractor::isEligible(ArrayRef<BasicBlock*> code) { + // Deny a single basic block that's a landing pad block. + if (code.size() == 1 && code[0]->isLandingPad()) + return false; + + // Deny code region if it contains allocas or vastarts. + for (ArrayRef<BasicBlock*>::iterator BB = code.begin(), e=code.end(); + BB != e; ++BB) + for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end(); + I != Ie; ++I) + if (isa<AllocaInst>(*I)) + return false; + else if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) + return false; + return true; +} + + +/// ExtractCodeRegion - Slurp a sequence of basic blocks into a brand new +/// function. +/// +Function* llvm::ExtractCodeRegion(DominatorTree &DT, + ArrayRef<BasicBlock*> code, + bool AggregateArgs) { + return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code); +} + +/// ExtractLoop - Slurp a natural loop into a brand new function. +/// +Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) { + return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks()); +} + +/// ExtractBasicBlock - Slurp a basic block into a brand new function. +/// +Function* llvm::ExtractBasicBlock(ArrayRef<BasicBlock*> BBs, bool AggregateArgs){ + return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs); +}
diff --git a/src/LLVM/lib/Transforms/Utils/DemoteRegToStack.cpp b/src/LLVM/lib/Transforms/Utils/DemoteRegToStack.cpp index f2a2772..8cc2649 100644 --- a/src/LLVM/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/src/LLVM/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -40,10 +40,10 @@ AllocaInst *Slot; if (AllocaPoint) { Slot = new AllocaInst(I.getType(), 0, - AllocaPoint); + I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); - Slot = new AllocaInst(I.getType(), 0, + Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", F->getEntryBlock().begin()); } @@ -67,7 +67,7 @@ Value *&V = Loads[PN->getIncomingBlock(i)]; if (V == 0) { // Insert the load into the predecessor block - V = new LoadInst(Slot, VolatileLoads, + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); } PN->setIncomingValue(i, V); @@ -75,15 +75,27 @@ } else { // If this is a normal instruction, just insert a load. - Value *V = new LoadInst(Slot, VolatileLoads, U); + Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U); U->replaceUsesOfWith(&I, V); } } - // Insert stores of the computed value into the stack slot. - BasicBlock::iterator InsertPt = &I; - ++InsertPt; + // Insert stores of the computed value into the stack slot. We have to be + // careful is I is an invoke instruction though, because we can't insert the + // store AFTER the terminator instruction. + BasicBlock::iterator InsertPt; + if (!isa<TerminatorInst>(I)) { + InsertPt = &I; + ++InsertPt; + } else { + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. + InvokeInst &II = cast<InvokeInst>(I); + assert(II.getNormalDest()->getSinglePredecessor() && + "Cannot demote invoke with a critical successor!"); + InsertPt = II.getNormalDest()->begin(); + } for (; isa<PHINode>(InsertPt); ++InsertPt) /* empty */; // Don't insert before any PHI nodes. @@ -106,21 +118,25 @@ AllocaInst *Slot; if (AllocaPoint) { Slot = new AllocaInst(P->getType(), 0, - AllocaPoint); + P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), 0, + Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", F->getEntryBlock().begin()); } // Iterate over each operand, insert store in each predecessor. for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { + assert(II->getParent() != P->getIncomingBlock(i) && + "Invoke edge not supported yet"); (void)II; + } new StoreInst(P->getIncomingValue(i), Slot, P->getIncomingBlock(i)->getTerminator()); } // Insert load in place of the phi and replace all uses. - Value *V = new LoadInst(Slot, P); + Value *V = new LoadInst(Slot, P->getName()+".reload", P); P->replaceAllUsesWith(V); // Delete phi.
diff --git a/src/LLVM/lib/Transforms/Utils/INSTALL.vcxproj b/src/LLVM/lib/Transforms/Utils/INSTALL.vcxproj new file mode 100644 index 0000000..569e19a --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/INSTALL.vcxproj
@@ -0,0 +1,261 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{048BB775-7681-4EE1-AACF-5A067ACEEEA5}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>INSTALL</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -DBUILD_TYPE=$(Configuration) -P cmake_install.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\INSTALL_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/INSTALL_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\INSTALL_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Utils/INSTALL.vcxproj.filters b/src/LLVM/lib/Transforms/Utils/INSTALL.vcxproj.filters new file mode 100644 index 0000000..251dd1d --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/INSTALL.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\INSTALL_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Utils/InlineFunction.cpp b/src/LLVM/lib/Transforms/Utils/InlineFunction.cpp new file mode 100644 index 0000000..5464dbc --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,1318 @@ +//===- InlineFunction.cpp - Code to perform function inlining -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inlining of a function into a call site, resolving +// parameters and the return value as appropriate. +// +// The code in this file for handling inlines through invoke +// instructions preserves semantics only under some assumptions about +// the behavior of unwinders which correspond to gcc-style libUnwind +// exception personality functions. Eventually the IR will be +// improved to make this unnecessary, but until then, this code is +// marked [LIBUNWIND]. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/Attributes.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/IRBuilder.h" +using namespace llvm; + +bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) { + return InlineFunction(CallSite(CI), IFI); +} +bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) { + return InlineFunction(CallSite(II), IFI); +} + +// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is +// turned on. + +/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block. +static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) { + for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) { + EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i); + if (exn) return exn; + } + + return 0; +} + +/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for +/// the given llvm.eh.exception call. +static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) { + BasicBlock *exnBlock = exn->getParent(); + + EHSelectorInst *outOfBlockSelector = 0; + for (Instruction::use_iterator + ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) { + EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui); + if (!sel) continue; + + // Immediately accept an eh.selector in the same block as the + // excepton call. + if (sel->getParent() == exnBlock) return sel; + + // Otherwise, use the first selector we see. + if (!outOfBlockSelector) outOfBlockSelector = sel; + } + + return outOfBlockSelector; +} + +/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector +/// in the given landing pad. In principle, llvm.eh.exception is +/// required to be in the landing pad; in practice, SplitCriticalEdge +/// can break that invariant, and then inlining can break it further. +/// There's a real need for a reliable solution here, but until that +/// happens, we have some fragile workarounds here. +static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) { + // Look for an exception call in the actual landing pad. + EHExceptionInst *exn = findExceptionInBlock(lpad); + if (exn) return findSelectorForException(exn); + + // Okay, if that failed, look for one in an obvious successor. If + // we find one, we'll fix the IR by moving things back to the + // landing pad. + + bool dominates = true; // does the lpad dominate the exn call + BasicBlock *nonDominated = 0; // if not, the first non-dominated block + BasicBlock *lastDominated = 0; // and the block which branched to it + + BasicBlock *exnBlock = lpad; + + // We need to protect against lpads that lead into infinite loops. + SmallPtrSet<BasicBlock*,4> visited; + visited.insert(exnBlock); + + do { + // We're not going to apply this hack to anything more complicated + // than a series of unconditional branches, so if the block + // doesn't terminate in an unconditional branch, just fail. More + // complicated cases can arise when, say, sinking a call into a + // split unwind edge and then inlining it; but that can do almost + // *anything* to the CFG, including leaving the selector + // completely unreachable. The only way to fix that properly is + // to (1) prohibit transforms which move the exception or selector + // values away from the landing pad, e.g. by producing them with + // instructions that are pinned to an edge like a phi, or + // producing them with not-really-instructions, and (2) making + // transforms which split edges deal with that. + BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back()); + if (!branch || branch->isConditional()) return 0; + + BasicBlock *successor = branch->getSuccessor(0); + + // Fail if we found an infinite loop. + if (!visited.insert(successor)) return 0; + + // If the successor isn't dominated by exnBlock: + if (!successor->getSinglePredecessor()) { + // We don't want to have to deal with threading the exception + // through multiple levels of phi, so give up if we've already + // followed a non-dominating edge. + if (!dominates) return 0; + + // Otherwise, remember this as a non-dominating edge. + dominates = false; + nonDominated = successor; + lastDominated = exnBlock; + } + + exnBlock = successor; + + // Can we stop here? + exn = findExceptionInBlock(exnBlock); + } while (!exn); + + // Look for a selector call for the exception we found. + EHSelectorInst *selector = findSelectorForException(exn); + if (!selector) return 0; + + // The easy case is when the landing pad still dominates the + // exception call, in which case we can just move both calls back to + // the landing pad. + if (dominates) { + selector->moveBefore(lpad->getFirstNonPHI()); + exn->moveBefore(selector); + return selector; + } + + // Otherwise, we have to split at the first non-dominating block. + // The CFG looks basically like this: + // lpad: + // phis_0 + // insnsAndBranches_1 + // br label %nonDominated + // nonDominated: + // phis_2 + // insns_3 + // %exn = call i8* @llvm.eh.exception() + // insnsAndBranches_4 + // %selector = call @llvm.eh.selector(i8* %exn, ... + // We need to turn this into: + // lpad: + // phis_0 + // %exn0 = call i8* @llvm.eh.exception() + // %selector0 = call @llvm.eh.selector(i8* %exn0, ... + // insnsAndBranches_1 + // br label %split // from lastDominated + // nonDominated: + // phis_2 (without edge from lastDominated) + // %exn1 = call i8* @llvm.eh.exception() + // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ... + // br label %split + // split: + // phis_2 (edge from lastDominated, edge from split) + // %exn = phi ... + // %selector = phi ... + // insns_3 + // insnsAndBranches_4 + + assert(nonDominated); + assert(lastDominated); + + // First, make clones of the intrinsics to go in lpad. + EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone()); + EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone()); + lpadSelector->setArgOperand(0, lpadExn); + lpadSelector->insertBefore(lpad->getFirstNonPHI()); + lpadExn->insertBefore(lpadSelector); + + // Split the non-dominated block. + BasicBlock *split = + nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(), + nonDominated->getName() + ".lpad-fix"); + + // Redirect the last dominated branch there. + cast<BranchInst>(lastDominated->back()).setSuccessor(0, split); + + // Move the existing intrinsics to the end of the old block. + selector->moveBefore(&nonDominated->back()); + exn->moveBefore(selector); + + Instruction *splitIP = &split->front(); + + // For all the phis in nonDominated, make a new phi in split to join + // that phi with the edge from lastDominated. + for (BasicBlock::iterator + i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) { + PHINode *phi = dyn_cast<PHINode>(i); + if (!phi) break; + + PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(), + splitIP); + phi->replaceAllUsesWith(splitPhi); + splitPhi->addIncoming(phi, nonDominated); + splitPhi->addIncoming(phi->removeIncomingValue(lastDominated), + lastDominated); + } + + // Make new phis for the exception and selector. + PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP); + exn->replaceAllUsesWith(exnPhi); + selector->setArgOperand(0, exn); // except for this use + exnPhi->addIncoming(exn, nonDominated); + exnPhi->addIncoming(lpadExn, lastDominated); + + PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP); + selector->replaceAllUsesWith(selectorPhi); + selectorPhi->addIncoming(selector, nonDominated); + selectorPhi->addIncoming(lpadSelector, lastDominated); + + return lpadSelector; +} + +namespace { + /// A class for recording information about inlining through an invoke. + class InvokeInliningInfo { + BasicBlock *OuterUnwindDest; + EHSelectorInst *OuterSelector; + BasicBlock *InnerUnwindDest; + PHINode *InnerExceptionPHI; + PHINode *InnerSelectorPHI; + SmallVector<Value*, 8> UnwindDestPHIValues; + + // FIXME: New EH - These will replace the analogous ones above. + BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind. + BasicBlock *InnerResumeDest; //< Destination for the callee's resume. + LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke. + PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts. + + public: + InvokeInliningInfo(InvokeInst *II) + : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0), + InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0), + OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), + CallerLPad(0), InnerEHValuesPHI(0) { + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing + // the edge from this block. + llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock::iterator I = OuterUnwindDest->begin(); + for (; isa<PHINode>(I); ++I) { + // Save the value to use for this edge. + PHINode *PHI = cast<PHINode>(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + // FIXME: With the new EH, this if/dyn_cast should be a 'cast'. + if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) { + CallerLPad = LPI; + } + } + + /// The outer unwind destination is the target of unwind edges + /// introduced for calls within the inlined function. + BasicBlock *getOuterUnwindDest() const { + return OuterUnwindDest; + } + + EHSelectorInst *getOuterSelector() { + if (!OuterSelector) + OuterSelector = findSelectorForLandingPad(OuterUnwindDest); + return OuterSelector; + } + + BasicBlock *getInnerUnwindDest(); + + // FIXME: New EH - Rename when new EH is turned on. + BasicBlock *getInnerUnwindDestNewEH(); + + LandingPadInst *getLandingPadInst() const { return CallerLPad; } + + bool forwardEHResume(CallInst *call, BasicBlock *src); + + /// forwardResume - Forward the 'resume' instruction to the caller's landing + /// pad block. When the landing pad block has only one predecessor, this is + /// a simple branch. When there is more than one predecessor, we need to + /// split the landing pad block after the landingpad instruction and jump + /// to there. + void forwardResume(ResumeInst *RI); + + /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind + /// destination block for the given basic block, using the values for the + /// original invoke's source block. + void addIncomingPHIValuesFor(BasicBlock *BB) const { + addIncomingPHIValuesForInto(BB, OuterUnwindDest); + } + + void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { + BasicBlock::iterator I = dest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *phi = cast<PHINode>(I); + phi->addIncoming(UnwindDestPHIValues[i], src); + } + } + }; +} + +/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to +/// llvm.eh.resume. +BasicBlock *InvokeInliningInfo::getInnerUnwindDest() { + if (InnerUnwindDest) return InnerUnwindDest; + + // Find and hoist the llvm.eh.exception and llvm.eh.selector calls + // in the outer landing pad to immediately following the phis. + EHSelectorInst *selector = getOuterSelector(); + if (!selector) return 0; + + // The call to llvm.eh.exception *must* be in the landing pad. + Instruction *exn = cast<Instruction>(selector->getArgOperand(0)); + assert(exn->getParent() == OuterUnwindDest); + + // TODO: recognize when we've already done this, so that we don't + // get a linear number of these when inlining calls into lots of + // invokes with the same landing pad. + + // Do the hoisting. + Instruction *splitPoint = exn->getParent()->getFirstNonPHI(); + assert(splitPoint != selector && "selector-on-exception dominance broken!"); + if (splitPoint == exn) { + selector->removeFromParent(); + selector->insertAfter(exn); + splitPoint = selector->getNextNode(); + } else { + exn->moveBefore(splitPoint); + selector->moveBefore(splitPoint); + } + + // Split the landing pad. + InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint, + OuterUnwindDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned phiCapacity = 2; + + // Create corresponding new phis for all the phis in the outer landing pad. + BasicBlock::iterator insertPoint = InnerUnwindDest->begin(); + BasicBlock::iterator I = OuterUnwindDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *outerPhi = cast<PHINode>(I); + PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity, + outerPhi->getName() + ".lpad-body", + insertPoint); + outerPhi->replaceAllUsesWith(innerPhi); + innerPhi->addIncoming(outerPhi, OuterUnwindDest); + } + + // Create a phi for the exception value... + InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity, + "exn.lpad-body", insertPoint); + exn->replaceAllUsesWith(InnerExceptionPHI); + selector->setArgOperand(0, exn); // restore this use + InnerExceptionPHI->addIncoming(exn, OuterUnwindDest); + + // ...and the selector. + InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity, + "selector.lpad-body", insertPoint); + selector->replaceAllUsesWith(InnerSelectorPHI); + InnerSelectorPHI->addIncoming(selector, OuterUnwindDest); + + // All done. + return InnerUnwindDest; +} + +/// [LIBUNWIND] Try to forward the given call, which logically occurs +/// at the end of the given block, as a branch to the inner unwind +/// block. Returns true if the call was forwarded. +bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) { + // First, check whether this is a call to the intrinsic. + Function *fn = dyn_cast<Function>(call->getCalledValue()); + if (!fn || fn->getName() != "llvm.eh.resume") + return false; + + // At this point, we need to return true on all paths, because + // otherwise we'll construct an invoke of the intrinsic, which is + // not well-formed. + + // Try to find or make an inner unwind dest, which will fail if we + // can't find a selector call for the outer unwind dest. + BasicBlock *dest = getInnerUnwindDest(); + bool hasSelector = (dest != 0); + + // If we failed, just use the outer unwind dest, dropping the + // exception and selector on the floor. + if (!hasSelector) + dest = OuterUnwindDest; + + // Make a branch. + BranchInst::Create(dest, src); + + // Update the phis in the destination. They were inserted in an + // order which makes this work. + addIncomingPHIValuesForInto(src, dest); + + if (hasSelector) { + InnerExceptionPHI->addIncoming(call->getArgOperand(0), src); + InnerSelectorPHI->addIncoming(call->getArgOperand(1), src); + } + + return true; +} + +/// Get or create a target for the branch from ResumeInsts. +BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() { + // FIXME: New EH - rename this function when new EH is turned on. + if (InnerResumeDest) return InnerResumeDest; + + // Split the landing pad. + BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint; + InnerResumeDest = + OuterResumeDest->splitBasicBlock(SplitPoint, + OuterResumeDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned PHICapacity = 2; + + // Create corresponding new PHIs for all the PHIs in the outer landing pad. + BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *OuterPHI = cast<PHINode>(I); + PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, + OuterPHI->getName() + ".lpad-body", + InsertPoint); + OuterPHI->replaceAllUsesWith(InnerPHI); + InnerPHI->addIncoming(OuterPHI, OuterResumeDest); + } + + // Create a PHI for the exception values. + InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, + "eh.lpad-body", InsertPoint); + CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); + InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); + + // All done. + return InnerResumeDest; +} + +/// forwardResume - Forward the 'resume' instruction to the caller's landing pad +/// block. When the landing pad block has only one predecessor, this is a simple +/// branch. When there is more than one predecessor, we need to split the +/// landing pad block after the landingpad instruction and jump to there. +void InvokeInliningInfo::forwardResume(ResumeInst *RI) { + BasicBlock *Dest = getInnerUnwindDestNewEH(); + BasicBlock *Src = RI->getParent(); + + BranchInst::Create(Dest, Src); + + // Update the PHIs in the destination. They were inserted in an order which + // makes this work. + addIncomingPHIValuesForInto(Src, Dest); + + InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); + RI->eraseFromParent(); +} + +/// [LIBUNWIND] Check whether this selector is "only cleanups": +/// call i32 @llvm.eh.selector(blah, blah, i32 0) +static bool isCleanupOnlySelector(EHSelectorInst *selector) { + if (selector->getNumArgOperands() != 3) return false; + ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2)); + return (val && val->isZero()); +} + +/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into +/// an invoke, we have to turn all of the calls that can throw into +/// invokes. This function analyze BB to see if there are any calls, and if so, +/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI +/// nodes in that block with the values specified in InvokeDestPHIValues. +/// +/// Returns true to indicate that the next block should be skipped. +static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, + InvokeInliningInfo &Invoke) { + LandingPadInst *LPI = Invoke.getLandingPadInst(); + + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *I = BBI++; + + if (LPI) // FIXME: New EH - This won't be NULL in the new EH. + if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) { + unsigned NumClauses = LPI->getNumClauses(); + L->reserveClauses(NumClauses); + for (unsigned i = 0; i != NumClauses; ++i) + L->addClause(LPI->getClause(i)); + } + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast<CallInst>(I); + if (CI == 0) continue; + + // LIBUNWIND: merge selector instructions. + if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) { + EHSelectorInst *Outer = Invoke.getOuterSelector(); + if (!Outer) continue; + + bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner); + bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer); + + // If both selectors contain only cleanups, we don't need to do + // anything. TODO: this is really just a very specific instance + // of a much more general optimization. + if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue; + + // Otherwise, we just append the outer selector to the inner selector. + SmallVector<Value*, 16> NewSelector; + for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i) + NewSelector.push_back(Inner->getArgOperand(i)); + for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i) + NewSelector.push_back(Outer->getArgOperand(i)); + + CallInst *NewInner = + IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector); + // No need to copy attributes, calling convention, etc. + NewInner->takeName(Inner); + Inner->replaceAllUsesWith(NewInner); + Inner->eraseFromParent(); + continue; + } + + // If this call cannot unwind, don't convert it to an invoke. + if (CI->doesNotThrow()) + continue; + + // Convert this function call into an invoke instruction. + // First, split the basic block. + BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + + // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch + // directly to the new landing pad. + if (Invoke.forwardEHResume(CI, BB)) { + // TODO: 'Split' is now unreachable; clean it up. + + // We want to leave the original call intact so that the call + // graph and other structures won't get misled. We also have to + // avoid processing the next block, or we'll iterate here forever. + return true; + } + + // Otherwise, create the new invoke instruction. + ImmutableCallSite CS(CI); + SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, + Invoke.getOuterUnwindDest(), + InvokeArgs, CI->getName(), BB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + + // Make sure that anything using the call now uses the invoke! This also + // updates the CallGraph if present, because it uses a WeakVH. + CI->replaceAllUsesWith(II); + + Split->getInstList().pop_front(); // Delete the original call + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + Invoke.addIncomingPHIValuesFor(BB); + return false; + } + + return false; +} + + +/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes and turn unwind +/// instructions into branches to the invoke unwind dest. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *InvokeDest = II->getUnwindDest(); + + Function *Caller = FirstNewBlock->getParent(); + + // The inlined code is currently at the end of the function, scan from the + // start of the inlined code to its end, checking for stuff we need to + // rewrite. If the code doesn't have calls or unwinds, we know there is + // nothing to rewrite. + if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) { + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); + return; + } + + InvokeInliningInfo Invoke(II); + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ + if (InlinedCodeInfo.ContainsCalls) + if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { + // Honor a request to skip the next block. We don't need to + // consider UnwindInsts in this case either. + ++BB; + continue; + } + + if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + // An UnwindInst requires special handling when it gets inlined into an + // invoke site. Once this happens, we know that the unwind would cause + // a control transfer to the invoke exception destination, so we can + // transform it into a direct branch to the exception destination. + BranchInst::Create(InvokeDest, UI); + + // Delete the unwind instruction! + UI->eraseFromParent(); + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + Invoke.addIncomingPHIValuesFor(BB); + } + + if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + Invoke.forwardResume(RI); + } + } + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); +} + +/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee +/// into the caller, update the specified callgraph to reflect the changes we +/// made. Note that it's possible that not all code was copied over, so only +/// some edges of the callgraph may remain. +static void UpdateCallGraphAfterInlining(CallSite CS, + Function::iterator FirstNewBlock, + ValueToValueMapTy &VMap, + InlineFunctionInfo &IFI) { + CallGraph &CG = *IFI.CG; + const Function *Caller = CS.getInstruction()->getParent()->getParent(); + const Function *Callee = CS.getCalledFunction(); + CallGraphNode *CalleeNode = CG[Callee]; + CallGraphNode *CallerNode = CG[Caller]; + + // Since we inlined some uninlined call sites in the callee into the caller, + // add edges from the caller to all of the callees of the callee. + CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); + + // Consider the case where CalleeNode == CallerNode. + CallGraphNode::CalledFunctionsVector CallCache; + if (CalleeNode == CallerNode) { + CallCache.assign(I, E); + I = CallCache.begin(); + E = CallCache.end(); + } + + for (; I != E; ++I) { + const Value *OrigCall = I->first; + + ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); + // Only copy the edge if the call was inlined! + if (VMI == VMap.end() || VMI->second == 0) + continue; + + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + Instruction *NewCall = dyn_cast<Instruction>(VMI->second); + if (NewCall == 0) continue; + + // Remember that this call site got inlined for the client of + // InlineFunction. + IFI.InlinedCalls.push_back(NewCall); + + // It's possible that inlining the callsite will cause it to go from an + // indirect to a direct call by resolving a function pointer. If this + // happens, set the callee of the new call site to a more precise + // destination. This can also happen if the call graph node of the caller + // was just unnecessarily imprecise. + if (I->second->getFunction() == 0) + if (Function *F = CallSite(NewCall).getCalledFunction()) { + // Indirect call site resolved to direct call. + CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); + + continue; + } + + CallerNode->addCalledFunction(CallSite(NewCall), I->second); + } + + // Update the call graph by deleting the edge from Callee to Caller. We must + // do this after the loop above in case Caller and Callee are the same. + CallerNode->removeCallEdgeFor(CS); +} + +/// HandleByValArgument - When inlining a call site that has a byval argument, +/// we have to make the implicit memcpy explicit by adding it. +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + unsigned ByValAlignment) { + Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + + // If the called function is readonly, then it could not mutate the caller's + // copy of the byval'd memory. In this case, it is safe to elide the copy and + // temporary. + if (CalledFunc->onlyReadsMemory()) { + // If the byval argument has a specified alignment that is greater than the + // passed in pointer, then we either have to round up the input pointer or + // give up on this transformation. + if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. + return Arg; + + // If the pointer is already known to be sufficiently aligned, or if we can + // round it up to a larger alignment, then we don't need a temporary. + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, + IFI.TD) >= ByValAlignment) + return Arg; + + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad + // for code quality, but rarely happens and is required for correctness. + } + + LLVMContext &Context = Arg->getContext(); + + Type *VoidPtrTy = Type::getInt8PtrTy(Context); + + // Create the alloca. If we have TargetData, use nice alignment. + unsigned Align = 1; + if (IFI.TD) + Align = IFI.TD->getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, ByValAlignment); + + Function *Caller = TheCall->getParent()->getParent(); + + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + &*Caller->begin()->begin()); + // Emit a memcpy. + Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy, + Tys); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (IFI.TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + +// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime +// intrinsic. +static bool isUsedByLifetimeMarker(Value *V) { + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; + ++UI) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + } + return false; +} + +// hasLifetimeMarkers - Check whether the given alloca already has +// lifetime.start or lifetime.end intrinsics. +static bool hasLifetimeMarkers(AllocaInst *AI) { + Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); + if (AI->getType() == Int8PtrTy) + return isUsedByLifetimeMarker(AI); + + // Do a scan to find all the casts to i8*. + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E; + ++I) { + if (I->getType() != Int8PtrTy) continue; + if (I->stripPointerCasts() != AI) continue; + if (isUsedByLifetimeMarker(*I)) + return true; + } + return false; +} + +/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively +/// update InlinedAtEntry of a DebugLoc. +static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, + const DebugLoc &InlinedAtDL, + LLVMContext &Ctx) { + if (MDNode *IA = DL.getInlinedAt(Ctx)) { + DebugLoc NewInlinedAtDL + = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + NewInlinedAtDL.getAsMDNode(Ctx)); + } + + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + InlinedAtDL.getAsMDNode(Ctx)); +} + + +/// fixupLineNumbers - Update inlined instructions' line numbers to +/// to encode location where these instructions are inlined. +static void fixupLineNumbers(Function *Fn, Function::iterator FI, + Instruction *TheCall) { + DebugLoc TheCallDL = TheCall->getDebugLoc(); + if (TheCallDL.isUnknown()) + return; + + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) { + BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { + LLVMContext &Ctx = BI->getContext(); + MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); + DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), + InlinedAt, Ctx)); + } + } + } + } +} + +// InlineFunction - This function inlines the called function into the basic +// block of the caller. This returns false if it is not possible to inline this +// call. The program is still in a well defined state if this occurs though. +// +// Note that this only does one level of inlining. For example, if the +// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +// exists in the instruction stream. Similarly this will inline a recursive +// function by one level. +// +bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { + Instruction *TheCall = CS.getInstruction(); + LLVMContext &Context = TheCall->getContext(); + assert(TheCall->getParent() && TheCall->getParent()->getParent() && + "Instruction not in function!"); + + // If IFI has any state in it, zap it before we fill it in. + IFI.reset(); + + const Function *CalledFunc = CS.getCalledFunction(); + if (CalledFunc == 0 || // Can't inline external function or indirect + CalledFunc->isDeclaration() || // call, or call to a vararg function! + CalledFunc->getFunctionType()->isVarArg()) return false; + + // If the call to the callee is not a tail call, we must clear the 'tail' + // flags on any calls that we inline. + bool MustClearTailCallFlags = + !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); + + // If the call to the callee cannot throw, set the 'nounwind' flag on any + // calls that we inline. + bool MarkNoUnwind = CS.doesNotThrow(); + + BasicBlock *OrigBB = TheCall->getParent(); + Function *Caller = OrigBB->getParent(); + + // GC poses two hazards to inlining, which only occur when the callee has GC: + // 1. If the caller has no GC, then the callee's GC must be propagated to the + // caller. + // 2. If the caller has a differing GC, it is invalid to inline. + if (CalledFunc->hasGC()) { + if (!Caller->hasGC()) + Caller->setGC(CalledFunc->getGC()); + else if (CalledFunc->getGC() != Caller->getGC()) + return false; + } + + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + for (Function::const_iterator + I = Caller->begin(), E = Caller->end(); I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'isa' here should become go away once the new EH system is + // in place. + if (!isa<LandingPadInst>(BB->getFirstNonPHI())) + continue; + const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); + const Value *CallerPersFn = LP->getPersonalityFn(); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + for (Function::const_iterator + I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once + // the new EH system is in place. + if (const LandingPadInst *LP = + dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) + if (CallerPersFn != LP->getPersonalityFn()) + return false; + break; + } + + break; + } + + // Get an iterator to the last basic block in the function, which will have + // the new function inlined after it. + // + Function::iterator LastBlock = &Caller->back(); + + // Make sure to capture all of the return instructions from the cloned + // function. + SmallVector<ReturnInst*, 8> Returns; + ClonedCodeInfo InlinedFunctionInfo; + Function::iterator FirstNewBlock; + + { // Scope to destroy VMap after cloning. + ValueToValueMapTy VMap; + + assert(CalledFunc->arg_size() == CS.arg_size() && + "No varargs calls can be inlined!"); + + // Calculate the vector of arguments to pass into the function cloner, which + // matches up the formal to the actual argument values. + CallSite::arg_iterator AI = CS.arg_begin(); + unsigned ArgNo = 0; + for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { + Value *ActualArg = *AI; + + // When byval arguments actually inlined, we need to make the copy implied + // by them explicit. However, we don't do this if the callee is readonly + // or readnone, because the copy would be unneeded: the callee doesn't + // modify the struct. + if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + CalledFunc->getParamAlignment(ArgNo+1)); + + // Calls that we inline may use the new alloca, so we need to clear + // their 'tail' flags if HandleByValArgument introduced a new alloca and + // the callee has calls. + MustClearTailCallFlags |= ActualArg != *AI; + } + + VMap[I] = ActualArg; + } + + // We want the inliner to prune the code as it copies. We would LOVE to + // have no dead or constant instructions leftover after inlining occurs + // (which can happen, e.g., because an argument was constant), but we'll be + // happy with whatever the cloner can do. + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + /*ModuleLevelChanges=*/false, Returns, ".i", + &InlinedFunctionInfo, IFI.TD, TheCall); + + // Remember the first block that is newly cloned over. + FirstNewBlock = LastBlock; ++FirstNewBlock; + + // Update the callgraph if requested. + if (IFI.CG) + UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); + + // Update inlined instructions' line number information. + fixupLineNumbers(Caller, FirstNewBlock, TheCall); + } + + // If there are any alloca instructions in the block that used to be the entry + // block for the callee, move them to the entry block of the caller. First + // calculate which instruction they should be inserted before. We insert the + // instructions at the end of the current alloca list. + // + { + BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + for (BasicBlock::iterator I = FirstNewBlock->begin(), + E = FirstNewBlock->end(); I != E; ) { + AllocaInst *AI = dyn_cast<AllocaInst>(I++); + if (AI == 0) continue; + + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } + + if (!isa<Constant>(AI->getArraySize())) + continue; + + // Keep track of the static allocas that we inline into the caller. + IFI.StaticAllocas.push_back(AI); + + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa<AllocaInst>(I) && + isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { + IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); + ++I; + } + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().getInstList().splice(InsertPoint, + FirstNewBlock->getInstList(), + AI, I); + } + } + + // Leave lifetime markers for the static alloca's, scoping them to the + // function we just inlined. + if (!IFI.StaticAllocas.empty()) { + IRBuilder<> builder(FirstNewBlock->begin()); + for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { + AllocaInst *AI = IFI.StaticAllocas[ai]; + + // If the alloca is already scoped to something smaller than the whole + // function then there's no need to add redundant, less accurate markers. + if (hasLifetimeMarkers(AI)) + continue; + + builder.CreateLifetimeStart(AI); + for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { + IRBuilder<> builder(Returns[ri]); + builder.CreateLifetimeEnd(AI); + } + } + } + + // If the inlined code contained dynamic alloca instructions, wrap the inlined + // code with llvm.stacksave/llvm.stackrestore intrinsics. + if (InlinedFunctionInfo.ContainsDynamicAllocas) { + Module *M = Caller->getParent(); + // Get the two intrinsics we care about. + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); + + // Insert the llvm.stacksave. + CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(StackSave, "savedstack"); + + // Insert a call to llvm.stackrestore before any return instructions in the + // inlined function. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); + } + + // Count the number of StackRestore calls we insert. + unsigned NumStackRestores = Returns.size(); + + // If we are inlining an invoke instruction, insert restores before each + // unwind. These unwinds will be rewritten into branches later. + if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) + if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr); + ++NumStackRestores; + } + } + } + + // If we are inlining tail call instruction through a call site that isn't + // marked 'tail', we must remove the tail marker for any calls in the inlined + // code. Also, calls inlined through a 'nounwind' call site should be marked + // 'nounwind'. + if (InlinedFunctionInfo.ContainsCalls && + (MustClearTailCallFlags || MarkNoUnwind)) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (MustClearTailCallFlags) + CI->setTailCall(false); + if (MarkNoUnwind) + CI->setDoesNotThrow(); + } + } + + // If we are inlining through a 'nounwind' call site then any inlined 'unwind' + // instructions are unreachable. + if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) { + TerminatorInst *Term = BB->getTerminator(); + if (isa<UnwindInst>(Term)) { + new UnreachableInst(Context, Term); + BB->getInstList().erase(Term); + } + } + + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any inlined 'unwind' instructions into branches to the invoke exception + // destination, and call instructions into invoke instructions. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) + HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + + // If we cloned in _exactly one_ basic block, and if that block ends in a + // return instruction, we splice the body of the inlined callee directly into + // the calling basic block. + if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { + // Move all of the instructions right before the call. + OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), + FirstNewBlock->begin(), FirstNewBlock->end()); + // Remove the cloned basic block. + Caller->getBasicBlockList().pop_back(); + + // If the call site was an invoke instruction, add a branch to the normal + // destination. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) + BranchInst::Create(II->getNormalDest(), TheCall); + + // If the return instruction returned a value, replace uses of the call with + // uses of the returned value. + if (!TheCall->use_empty()) { + ReturnInst *R = Returns[0]; + if (TheCall == R->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(R->getReturnValue()); + } + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // Since we are now done with the return instruction, delete it also. + Returns[0]->eraseFromParent(); + + // We are now done with the inlining. + return true; + } + + // Otherwise, we have the normal case, of more than one block to inline or + // multiple return sites. + + // We want to clone the entire callee function into the hole between the + // "starter" and "ender" blocks. How we accomplish this depends on whether + // this is an invoke instruction or a call instruction. + BasicBlock *AfterCallBB; + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + + // Add an unconditional branch to make this look like the CallInst case... + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + + // Split the basic block. This guarantees that no PHI nodes will have to be + // updated due to new incoming edges, and make the invoke case more + // symmetric to the call case. + AfterCallBB = OrigBB->splitBasicBlock(NewBr, + CalledFunc->getName()+".exit"); + + } else { // It's a call + // If this is a call instruction, we need to split the basic block that + // the call lives in. + // + AfterCallBB = OrigBB->splitBasicBlock(TheCall, + CalledFunc->getName()+".exit"); + } + + // Change the branch that used to go to AfterCallBB to branch to the first + // basic block of the inlined function. + // + TerminatorInst *Br = OrigBB->getTerminator(); + assert(Br && Br->getOpcode() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, FirstNewBlock); + + + // Now that the function is correct, make it a little bit nicer. In + // particular, move the basic blocks inserted from the end of the function + // into the space made by splitting the source basic block. + Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), + FirstNewBlock, Caller->end()); + + // Handle all of the return instructions that we just cloned in, and eliminate + // any users of the original call/invoke instruction. + Type *RTy = CalledFunc->getReturnType(); + + PHINode *PHI = 0; + if (Returns.size() > 1) { + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + if (!TheCall->use_empty()) { + PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), + AfterCallBB->begin()); + // Anything that used the result of the function call should now use the + // PHI node as their operand. + TheCall->replaceAllUsesWith(PHI); + } + + // Loop over all of the return instructions adding entries to the PHI node + // as appropriate. + if (PHI) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in function!"); + PHI->addIncoming(RI->getReturnValue(), RI->getParent()); + } + } + + + // Add a branch to the merge points and remove return instructions. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + BranchInst::Create(AfterCallBB, RI); + RI->eraseFromParent(); + } + } else if (!Returns.empty()) { + // Otherwise, if there is exactly one return value, just replace anything + // using the return value of the call with the computed value. + if (!TheCall->use_empty()) { + if (TheCall == Returns[0]->getReturnValue()) + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + else + TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); + } + + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + BasicBlock *ReturnBB = Returns[0]->getParent(); + ReturnBB->replaceAllUsesWith(AfterCallBB); + + // Splice the code from the return block into the block that it will return + // to, which contains the code that was after the call. + AfterCallBB->getInstList().splice(AfterCallBB->begin(), + ReturnBB->getInstList()); + + // Delete the return instruction now and empty ReturnBB now. + Returns[0]->eraseFromParent(); + ReturnBB->eraseFromParent(); + } else if (!TheCall->use_empty()) { + // No returns, but something is using the return value of the call. Just + // nuke the result. + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + } + + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // We should always be able to fold the entry block of the function into the + // single predecessor of the block... + assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); + BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); + + // Splice the code entry block into calling block, right before the + // unconditional branch. + CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); + + // Remove the unconditional branch. + OrigBB->getInstList().erase(Br); + + // Now we can remove the CalleeEntry block, which is now empty. + Caller->getBasicBlockList().erase(CalleeEntry); + + // If we inserted a phi node, check to see if it has a single value (e.g. all + // the entries are the same or undef). If so, remove the PHI so it doesn't + // block other optimizations. + if (PHI) + if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { + PHI->replaceAllUsesWith(V); + PHI->eraseFromParent(); + } + + return true; +}
diff --git a/src/LLVM/lib/Transforms/Utils/InstructionNamer.cpp b/src/LLVM/lib/Transforms/Utils/InstructionNamer.cpp new file mode 100644 index 0000000..45c15de --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,64 @@ +//===- InstructionNamer.cpp - Give anonymous instructions names -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a little utility pass that gives instructions names, this is mostly +// useful when diffing the effect of an optimization because deleting an +// unnamed instruction can change all other instruction numbering, making the +// diff very noisy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +using namespace llvm; + +namespace { + struct InstNamer : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstNamer() : FunctionPass(ID) { + initializeInstNamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &Info) const { + Info.setPreservesAll(); + } + + bool runOnFunction(Function &F) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); + AI != AE; ++AI) + if (!AI->hasName() && !AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BB->hasName()) + BB->setName("bb"); + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->hasName() && !I->getType()->isVoidTy()) + I->setName("tmp"); + } + return true; + } + }; + + char InstNamer::ID = 0; +} + +INITIALIZE_PASS(InstNamer, "instnamer", + "Assign names to anonymous instructions", false, false) +char &llvm::InstructionNamerID = InstNamer::ID; +//===----------------------------------------------------------------------===// +// +// InstructionNamer - Give any unnamed non-void instructions "tmp" names. +// +FunctionPass *llvm::createInstructionNamerPass() { + return new InstNamer(); +}
diff --git a/src/LLVM/lib/Transforms/Utils/LCSSA.cpp b/src/LLVM/lib/Transforms/Utils/LCSSA.cpp index 487fe64..b654111 100644 --- a/src/LLVM/lib/Transforms/Utils/LCSSA.cpp +++ b/src/LLVM/lib/Transforms/Utils/LCSSA.cpp
@@ -47,7 +47,9 @@ namespace { struct LCSSA : public LoopPass { static char ID; // Pass identification, replacement for typeid - LCSSA() : LoopPass(ID) {} + LCSSA() : LoopPass(ID) { + initializeLCSSAPass(*PassRegistry::getPassRegistry()); + } // Cached analysis information for the current function. DominatorTree *DT; @@ -65,10 +67,7 @@ AU.setPreservesCFG(); AU.addRequired<DominatorTree>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); AU.addPreservedID(LoopSimplifyID); AU.addPreserved<ScalarEvolution>(); } @@ -90,7 +89,10 @@ } char LCSSA::ID = 0; -static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass"); +INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) Pass *llvm::createLCSSAPass() { return new LCSSA(); } char &llvm::LCSSAID = LCSSA::ID; @@ -200,11 +202,15 @@ // the value, so adjust DomBB to the normal destination block, which is // effectively where the value is first usable. BasicBlock *DomBB = Inst->getParent(); + if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst)) + DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT->getNode(DomBB); + SmallVector<PHINode*, 16> AddedPHIs; + SSAUpdater SSAUpdate; - SSAUpdate.Initialize(Inst); + SSAUpdate.Initialize(Inst->getType(), Inst->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. @@ -217,8 +223,9 @@ if (SSAUpdate.HasValueForBlock(ExitBB)) continue; PHINode *PN = PHINode::Create(Inst->getType(), + PredCache.GetNumPreds(ExitBB), + Inst->getName()+".lcssa", ExitBB->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); // Add inputs from inside the loop for this PHI. for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { @@ -232,6 +239,8 @@ &PN->getOperandUse( PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); } + + AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); @@ -258,6 +267,12 @@ // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UsesToRewrite[i]); } + + // Remove PHI nodes that did not have any uses rewritten. + for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { + if (AddedPHIs[i]->use_empty()) + AddedPHIs[i]->eraseFromParent(); + } return true; }
diff --git a/src/LLVM/lib/Transforms/Utils/LLVMTransformUtils.vcxproj b/src/LLVM/lib/Transforms/Utils/LLVMTransformUtils.vcxproj new file mode 100644 index 0000000..616b915 --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/LLVMTransformUtils.vcxproj
@@ -0,0 +1,373 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|Win32"> + <Configuration>Profile</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Profile|x64"> + <Configuration>Profile</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{5C514254-58EE-4850-8743-F5D7BEAA3E66}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>LLVMTransformUtils</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>NotSet</CharacterSet> + <WholeProgramOptimization>true</WholeProgramOptimization> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.lib</TargetExt> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</OutDir> + <OutDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</OutDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Platform)\$(Configuration)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(Platform)\$(Configuration)\</IntDir> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(ProjectName)</TargetName> + <TargetName Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">$(ProjectName)</TargetName> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.lib</TargetExt> + <TargetExt Condition="'$(Configuration)|$(Platform)'=='Profile|x64'">.lib</TargetExt> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMTransformUtils.pdb</ProgramDataBaseFileName> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks> + <CompileAs>CompileAsCpp</CompileAs> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling> + </ExceptionHandling> + <InlineFunctionExpansion>Disabled</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Disabled</Optimization> + <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Debug";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Debug</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Debug/LLVMTransformUtils.pdb</ProgramDataBaseFileName> + <OmitFramePointers>false</OmitFramePointers> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Debug\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMTransformUtils.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|Win32'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat>ProgramDatabase</DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMTransformUtils.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMTransformUtils.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>true</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Profile|x64'"> + <ClCompile> + <AdditionalOptions> /Zm1000 /EHs-c- -w14062 %(AdditionalOptions)</AdditionalOptions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <CompileAs>CompileAsCpp</CompileAs> + <DisableSpecificWarnings>4146;4180;4224;4244;4267;4275;4291;4345;4351;4355;4503;4551;4624;4715;4800;4065;4181</DisableSpecificWarnings> + <ExceptionHandling>false</ExceptionHandling> + <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion> + <MultiProcessorCompilation>true</MultiProcessorCompilation> + <Optimization>Full</Optimization> + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> + <RuntimeTypeInfo>false</RuntimeTypeInfo> + <WarningLevel>Level3</WarningLevel> + <DebugInformationFormat> + </DebugInformationFormat> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR="Release";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AssemblerListingLocation>Release</AssemblerListingLocation> + <ObjectFileName>$(IntDir)</ObjectFileName> + <ProgramDataBaseFileName>..\..\Release/LLVMTransformUtils.pdb</ProgramDataBaseFileName> + <FavorSizeOrSpeed>Size</FavorSizeOrSpeed> + <OmitFramePointers>false</OmitFramePointers> + <WholeProgramOptimization>true</WholeProgramOptimization> + <StringPooling>true</StringPooling> + <BufferSecurityCheck>false</BufferSecurityCheck> + <FunctionLevelLinking>false</FunctionLevelLinking> + <FloatingPointExceptions>false</FloatingPointExceptions> + <CreateHotpatchableImage>false</CreateHotpatchableImage> + </ClCompile> + <ResourceCompile> + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NONSTDC_NO_DEPRECATE;_CRT_NONSTDC_NO_WARNINGS;_SCL_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;__STDC_CONSTANT_MACROS;__STDC_FORMAT_MACROS;__STDC_LIMIT_MACROS;_HAS_EXCEPTIONS=0;CMAKE_INTDIR=\"Release\";%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + </ResourceCompile> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="AddrModeMatcher.cpp" /> + <ClCompile Include="BasicBlockUtils.cpp" /> + <ClCompile Include="BreakCriticalEdges.cpp" /> + <ClCompile Include="BuildLibCalls.cpp" /> + <ClCompile Include="DemoteRegToStack.cpp" /> + <ClCompile Include="InstructionNamer.cpp" /> + <ClCompile Include="LCSSA.cpp" /> + <ClCompile Include="Local.cpp" /> + <ClCompile Include="LoopSimplify.cpp" /> + <ClCompile Include="LowerInvoke.cpp" /> + <ClCompile Include="LowerSwitch.cpp" /> + <ClCompile Include="PromoteMemoryToRegister.cpp" /> + <ClCompile Include="SSAUpdater.cpp" /> + <ClCompile Include="SimplifyCFG.cpp" /> + <ClCompile Include="UnifyFunctionExitNodes.cpp" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\Analysis/LLVMAnalysis.vcxproj"> + <Project>97EDF19C-6360-4770-9255-EBA2F1A13E9B</Project> + </ProjectReference> + <ProjectReference Include="..\..\VMCore/LLVMCore.vcxproj"> + <Project>00F3295C-F7A0-43D3-BD0B-1BC0515B30E1</Project> + </ProjectReference> + <ProjectReference Include="..\..\Support/LLVMSupport.vcxproj"> + <Project>C688DD59-C6CB-4B33-B56F-A7D6F3761524</Project> + </ProjectReference> + <ProjectReference Include="..\..\Target/LLVMTarget.vcxproj"> + <Project>76F7B8C2-C825-40DC-BB68-9D987275E320</Project> + </ProjectReference> + <ProjectReference Include="..\..\..\include/llvm/intrinsics_gen.vcxproj"> + <Project>E9B87B46-1EB0-4D95-9049-41B148FBADCD</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Utils/LLVMTransformUtils.vcxproj.filters b/src/LLVM/lib/Transforms/Utils/LLVMTransformUtils.vcxproj.filters new file mode 100644 index 0000000..f829786 --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/LLVMTransformUtils.vcxproj.filters
@@ -0,0 +1,63 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <ClCompile Include="AddrModeMatcher.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="BasicBlockUtils.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="BreakCriticalEdges.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="BuildLibCalls.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="DemoteRegToStack.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="InstructionNamer.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="LCSSA.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="Local.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="LoopSimplify.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="LowerInvoke.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="LowerSwitch.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="PromoteMemoryToRegister.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="SSAUpdater.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="SimplifyCFG.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="UnifyFunctionExitNodes.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{1733179C-6FE4-462E-9EA5-4A29A1ACFE25}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Utils/Local.cpp b/src/LLVM/lib/Transforms/Utils/Local.cpp index a6f4fc7..7034feb 100644 --- a/src/LLVM/lib/Transforms/Utils/Local.cpp +++ b/src/LLVM/lib/Transforms/Utils/Local.cpp
@@ -20,15 +20,21 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" +#include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" @@ -38,12 +44,16 @@ // Local constant propagation. // -// ConstantFoldTerminator - If a terminator instruction is predicated on a -// constant value, convert it into an unconditional branch to the constant -// destination. -// -bool llvm::ConstantFoldTerminator(BasicBlock *BB) { +/// ConstantFoldTerminator - If a terminator instruction is predicated on a +/// constant value, convert it into an unconditional branch to the constant +/// destination. This is a nontrivial operation because the successors of this +/// basic block must have their PHI nodes updated. +/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch +/// conditions and indirectbr addresses this might make dead if +/// DeleteDeadConditions is true. +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { TerminatorInst *T = BB->getTerminator(); + IRBuilder<> Builder(T); // Branch - See if we are conditional jumping on constant if (BranchInst *BI = dyn_cast<BranchInst>(T)) { @@ -63,12 +73,11 @@ // Let the basic block know that we are letting go of it. Based on this, // it will adjust it's PHI nodes. - assert(BI->getParent() && "Terminator not inserted in block!"); - OldDest->removePredecessor(BI->getParent()); + OldDest->removePredecessor(BB); - // Set the unconditional destination, and change the insn to be an - // unconditional branch. - BI->setUnconditionalDest(Destination); + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Destination); + BI->eraseFromParent(); return true; } @@ -81,8 +90,12 @@ assert(BI->getParent() && "Terminator not inserted in block!"); Dest1->removePredecessor(BI->getParent()); - // Change a conditional branch to unconditional. - BI->setUnconditionalDest(Dest1); + // Replace the conditional branch with an unconditional one. + Builder.CreateBr(Dest1); + Value *Cond = BI->getCondition(); + BI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond); return true; } return false; @@ -131,7 +144,7 @@ // now. if (TheOnlyDest) { // Insert the new branch. - BranchInst::Create(TheOnlyDest, SI); + Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); // Remove entries from PHI nodes which we no longer branch to... @@ -145,17 +158,21 @@ } // Delete the old switch. - BB->getInstList().erase(SI); + Value *Cond = SI->getCondition(); + SI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Cond); return true; } if (SI->getNumSuccessors() == 2) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. - Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(), - SI->getSuccessorValue(1)); + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + SI->getSuccessorValue(1), "cond"); + // Insert the new branch. - BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI); + Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0)); // Delete the old switch. SI->eraseFromParent(); @@ -170,7 +187,7 @@ dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); // Insert the new branch. - BranchInst::Create(TheOnlyDest, IBI); + Builder.CreateBr(TheOnlyDest); for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { if (IBI->getDestination(i) == TheOnlyDest) @@ -178,7 +195,10 @@ else IBI->getDestination(i)->removePredecessor(IBI->getParent()); } + Value *Address = IBI->getAddress(); IBI->eraseFromParent(); + if (DeleteDeadConditions) + RecursivelyDeleteTriviallyDeadInstructions(Address); // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an @@ -206,20 +226,37 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; - // We don't want debug info removed by anything this general. - if (ISA_DEBUG_INFO_INTRINSIC(I)) return false; + // We don't want the landingpad instruction removed by anything this general. + if (isa<LandingPadInst>(I)) + return false; - // Likewise for memory use markers. - if (isa<MemoryUseIntrinsic>(I)) return false; + // We don't want debug info removed by anything this general, unless + // debug info is empty. + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { + if (DDI->getAddress()) + return false; + return true; + } + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { + if (DVI->getValue()) + return false; + return true; + } if (!I->mayHaveSideEffects()) return true; // Special case intrinsics that "may have side effects" but can be deleted // when dead. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { // Safe to delete llvm.stacksave if dead. if (II->getIntrinsicID() == Intrinsic::stacksave) return true; + + // Lifetime intrinsics are dead when their right-hand is undef. + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + return isa<UndefValue>(II->getArgOperand(1)); + } return false; } @@ -260,36 +297,46 @@ return true; } +/// areAllUsesEqual - Check whether the uses of a value are all the same. +/// This is similar to Instruction::hasOneUse() except this will also return +/// true when there are no uses or multiple uses that all refer to the same +/// value. +static bool areAllUsesEqual(Instruction *I) { + Value::use_iterator UI = I->use_begin(); + Value::use_iterator UE = I->use_end(); + if (UI == UE) + return true; + + User *TheUse = *UI; + for (++UI; UI != UE; ++UI) { + if (*UI != TheUse) + return false; + } + return true; +} + /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. Return true if the PHI node is actually deleted. -bool -llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { - // We can remove a PHI if it is on a cycle in the def-use graph - // where each node in the cycle has degree one, i.e. only one use, - // and is an instruction with no side effects. - if (!PN->hasOneUse()) - return false; +/// too, recursively. Return true if a change was made. +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { + SmallPtrSet<Instruction*, 4> Visited; + for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); + I = cast<Instruction>(*I->use_begin())) { + if (I->use_empty()) + return RecursivelyDeleteTriviallyDeadInstructions(I); - bool Changed = false; - SmallPtrSet<PHINode *, 4> PHIs; - PHIs.insert(PN); - for (Instruction *J = cast<Instruction>(*PN->use_begin()); - J->hasOneUse() && !J->mayHaveSideEffects(); - J = cast<Instruction>(*J->use_begin())) - // If we find a PHI more than once, we're on a cycle that + // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. - if (PHINode *JP = dyn_cast<PHINode>(J)) - if (!PHIs.insert(cast<PHINode>(JP))) { - // Break the cycle and delete the PHI and its operands. - JP->replaceAllUsesWith(UndefValue::get(JP->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(JP); - Changed = true; - break; - } - return Changed; + if (!Visited.insert(I)) { + // Break the cycle and delete the instruction and its operands. + I->replaceAllUsesWith(UndefValue::get(I->getType())); + (void)RecursivelyDeleteTriviallyDeadInstructions(I); + return true; + } + } + return false; } /// SimplifyInstructionsInBlock - Scan the specified basic block and try to @@ -310,8 +357,14 @@ BI = BB->begin(); continue; } - + + if (Inst->isTerminator()) + break; + + WeakVH BIHandle(BI); MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + if (BIHandle != BI) + BI = BB->begin(); } return MadeChange; } @@ -346,13 +399,13 @@ WeakVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); - - Value *PNV = PN->hasConstantValue(); + + Value *PNV = SimplifyInstruction(PN, TD); if (PNV == 0) continue; - + // If we're able to simplify the phi to a single value, substitute the new // value into all of its uses. - assert(PNV != PN && "hasConstantValue broken"); + assert(PNV != PN && "SimplifyInstruction broken!"); Value *OldPhiIt = PhiIt; ReplaceAndSimplifyAllUses(PN, PNV, TD); @@ -383,10 +436,6 @@ BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); - // Splice all the instructions from PredBB to DestBB. - PredBB->getTerminator()->eraseFromParent(); - DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); - // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -401,7 +450,17 @@ // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); + // Splice all the instructions from PredBB to DestBB. + PredBB->getTerminator()->eraseFromParent(); + DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + if (P) { + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + if (DT) { + BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); + DT->changeImmediateDominator(DestBB, PredBBIDom); + DT->eraseNode(PredBB); + } ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); if (PI) { PI->replaceAllUses(PredBB, DestBB); @@ -486,10 +545,13 @@ /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, -/// potential debug intrinsics and the branch. If possible, eliminate BB by -/// rewriting all the predecessors to branch to the successor block and return -/// true. If we can't transform, return false. +/// potential side-effect free intrinsics and the branch. If possible, +/// eliminate BB by rewriting all the predecessors to branch to the successor +/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { + assert(BB != &BB->getParent()->getEntryBlock() && + "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); + // We can't eliminate infinite loops. BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); if (BB == Succ) return false; @@ -560,13 +622,15 @@ } } - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. - Succ->getInstList().splice(Succ->begin(), - BB->getInstList(), BB->begin()); - } else { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. assert(PN->use_empty() && "There shouldn't be any uses here!"); PN->eraseFromParent(); @@ -589,7 +653,7 @@ bool Changed = false; // This implementation doesn't currently consider undef operands - // specially. Theroetically, two phis which are identical except for + // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. // Map from PHI hash values to PHI nodes. If multiple PHIs have @@ -607,12 +671,19 @@ // them, which helps expose duplicates, but we have to check all the // operands to be safe in case instcombine hasn't run. uintptr_t Hash = 0; + // This hash algorithm is quite weak as hash functions go, but it seems + // to do a good enough job for this particular purpose, and is very quick. for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { - // This hash algorithm is quite weak as hash functions go, but it seems - // to do a good enough job for this particular purpose, and is very quick. Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); } + for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end(); + I != E; ++I) { + Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I)); + Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); + } + // Avoid colliding with the DenseMap sentinels ~0 and ~0-1. + Hash >>= 1; // If we've never seen this hash value before, it's a unique PHI. std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = HashMap.insert(std::make_pair(Hash, PN)); @@ -642,3 +713,176 @@ return Changed; } + +/// enforceKnownAlignment - If the specified pointer points to an object that +/// we control, modify the object's alignment to PrefAlign. This isn't +/// often possible though. If alignment is important, a more reliable approach +/// is to simply align all global variables and allocation instructions to +/// their preferred alignment from the beginning. +/// +static unsigned enforceKnownAlignment(Value *V, unsigned Align, + unsigned PrefAlign, const TargetData *TD) { + V = V->stripPointerCasts(); + + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // If the preferred alignment is greater than the natural stack alignment + // then don't round up. This avoids dynamic stack realignment. + if (TD && TD->exceedsNaturalStackAlignment(PrefAlign)) + return Align; + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + return AI->getAlignment(); + AI->setAlignment(PrefAlign); + return PrefAlign; + } + + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (GV->isDeclaration()) return Align; + + if (GV->getAlignment() >= PrefAlign) + return GV->getAlignment(); + // We can only increase the alignment of the global if it has no alignment + // specified or if it is not assigned a section. If it is assigned a + // section, the global could be densely packed with other objects in the + // section, increasing the alignment could cause padding issues. + if (!GV->hasSection() || GV->getAlignment() == 0) + GV->setAlignment(PrefAlign); + return GV->getAlignment(); + } + + return Align; +} + +/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, + const TargetData *TD) { + assert(V->getType()->isPointerTy() && + "getOrEnforceKnownAlignment expects a pointer!"); + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + unsigned TrailZ = KnownZero.countTrailingOnes(); + + // Avoid trouble with rediculously large TrailZ values, such as + // those computed from a null pointer. + TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + + // LLVM doesn't support alignments larger than this currently. + Align = std::min(Align, +Value::MaximumAlignment); + + if (PrefAlign > Align) + Align = enforceKnownAlignment(V, Align, PrefAlign, TD); + + // We don't need to make any adjustment. + return Align; +} + +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + StoreInst *SI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = NULL; + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = NULL; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) + DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI); + else + DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc SIDL = SI->getDebugLoc(); + if (!SIDL.isUnknown()) + DbgVal->setDebugLoc(SIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + LoadInst *LI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVar, LI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc LIDL = LI->getDebugLoc(); + if (!LIDL.isUnknown()) + DbgVal->setDebugLoc(LIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool llvm::LowerDbgDeclare(Function &F) { + DIBuilder DIB(*F.getParent()); + SmallVector<DbgDeclareInst *, 4> Dbgs; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + Dbgs.push_back(DDI); + } + if (Dbgs.empty()) + return false; + + for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(), + E = Dbgs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + bool RemoveDDI = true; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + else + RemoveDDI = false; + if (RemoveDDI) + DDI->eraseFromParent(); + } + } + return true; +} + +/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the +/// alloca 'V', if any. +DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { + if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + return DDI; + + return 0; +}
diff --git a/src/LLVM/lib/Transforms/Utils/LoopSimplify.cpp b/src/LLVM/lib/Transforms/Utils/LoopSimplify.cpp index 77d715d..cbd54a8 100644 --- a/src/LLVM/lib/Transforms/Utils/LoopSimplify.cpp +++ b/src/LLVM/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,7 +37,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loopsimplify" +#define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" @@ -47,6 +47,7 @@ #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -65,13 +66,16 @@ namespace { struct LoopSimplify : public LoopPass { static char ID; // Pass identification, replacement for typeid - LoopSimplify() : LoopPass(ID) {} + LoopSimplify() : LoopPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } // AA - If we have an alias analysis object to update, this is it, otherwise // this is null. AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; + ScalarEvolution *SE; Loop *L; virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -86,8 +90,6 @@ AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. - AU.addPreserved<DominanceFrontier>(); - AU.addPreservedID(LCSSAID); } /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. @@ -106,10 +108,14 @@ } char LoopSimplify::ID = 0; -static RegisterPass<LoopSimplify> -X("loopsimplify", "Canonicalize natural loops", true); +INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } @@ -122,6 +128,7 @@ LI = &getAnalysis<LoopInfo>(); AA = getAnalysisIfAvailable<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); Changed |= ProcessLoop(L, LPM); @@ -155,9 +162,8 @@ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(), E = BadPreds.end(); I != E; ++I) { - DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "; - WriteAsOperand(dbgs(), *I, false); - dbgs() << "\n"); + DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << (*I)->getName() << "\n"); // Inform each successor of each dead pred. for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) @@ -182,9 +188,8 @@ if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { - DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "; - WriteAsOperand(dbgs(), *I, false); - dbgs() << "\n"); + DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << (*I)->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); @@ -208,7 +213,7 @@ // predecessors from outside of the loop, split the edge now. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); - + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), @@ -260,8 +265,9 @@ PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = PN->hasConstantValue(DT)) { + if (Value *V = SimplifyInstruction(PN, 0, DT)) { if (AA) AA->deleteValue(PN); + if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); } @@ -297,7 +303,7 @@ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { Instruction *Inst = I++; // Skip debug info intrinsics. - if (ISA_DEBUG_INFO_INTRINSIC(Inst)) + if (isa<DbgInfoIntrinsic>(Inst)) continue; if (Inst == CI) continue; @@ -315,29 +321,30 @@ if (!FoldBranchToCommonDest(BI)) continue; // Success. The block is now dead, so remove it from the loop, - // update the dominator tree and dominance frontier, and delete it. + // update the dominator tree and delete it. + DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); - DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "; - WriteAsOperand(dbgs(), ExitingBlock, false); - dbgs() << "\n"); + // If any reachable control flow within this loop has changed, notify + // ScalarEvolution. Currently assume the parent loop doesn't change + // (spliting edges doesn't count). If blocks, CFG edges, or other values + // in the parent loop change, then we need call to forgetLoop() for the + // parent instead. + if (SE) + SE->forgetLoop(L); assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); - DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>(); DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = Node->getChildren(); while (!Children.empty()) { DomTreeNode *Child = Children.front(); DT->changeImmediateDominator(Child, Node->getIDom()); - if (DF) DF->changeImmediateDominator(Child->getBlock(), - Node->getIDom()->getBlock(), - DT); } DT->eraseNode(ExitingBlock); - if (DF) DF->removeBlock(ExitingBlock); BI->getSuccessor(0)->removePredecessor(ExitingBlock); BI->getSuccessor(1)->removePredecessor(ExitingBlock); @@ -374,11 +381,11 @@ // Split out the loop pre-header. BasicBlock *NewBB = SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), - this); + ".preheader", this); - DEBUG(dbgs() << "LoopSimplify: Creating pre-header "; - WriteAsOperand(dbgs(), NewBB, false); - dbgs() << "\n"); + NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() + << "\n"); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -403,15 +410,24 @@ } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], - LoopBlocks.size(), - this); + BasicBlock *NewExitBB = 0; - DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "; - WriteAsOperand(dbgs(), NewBB, false); - dbgs() << "\n"); + if (Exit->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0], + LoopBlocks.size()), + ".loopexit", ".nonloopexit", + this, NewBBs); + NewExitBB = NewBBs[0]; + } else { + NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], + LoopBlocks.size(), ".loopexit", + this); + } - return NewBB; + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); + return NewExitBB; } /// AddBlockAndPredsToSet - Add the specified block, and all of its @@ -436,11 +452,11 @@ /// FindPHIToPartitionLoops - The first part of loop-nestification is to find a /// PHI node that tells us how to partition the loops. static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, - AliasAnalysis *AA) { + AliasAnalysis *AA, LoopInfo *LI) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = PN->hasConstantValue(DT)) { + if (Value *V = SimplifyInstruction(PN, 0, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -470,23 +486,23 @@ if (&*BBI == SplitPreds[i]) return; } - + // If it isn't already after an outside block, move it after one. This is // always good as it makes the uncond branch from the outside block into a // fall-through. - + // Figure out *which* outside block to put this after. Prefer an outside // block that neighbors a BB actually in the loop. BasicBlock *FoundBB = 0; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { Function::iterator BBI = SplitPreds[i]; - if (++BBI != NewBB->getParent()->end() && + if (++BBI != NewBB->getParent()->end() && L->contains(BBI)) { FoundBB = SplitPreds[i]; break; } } - + // If our heuristic for a *good* bb to place this after doesn't find // anything, just pick something. It's likely better than leaving it within // the loop. @@ -514,7 +530,7 @@ /// created. /// Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { - PHINode *PN = FindPHIToPartitionLoops(L, DT, AA); + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); if (PN == 0) return 0; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This @@ -533,15 +549,21 @@ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + // If ScalarEvolution is around and knows anything about values in + // this loop, tell it to forget them, because we're about to + // substantially change it. + if (SE) + SE->forgetLoop(L); + BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], OuterLoopPreds.size(), - this); + ".outer", this); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L); - + // Create the new outer loop. Loop *NewOuter = new Loop(); @@ -622,17 +644,21 @@ std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; + + // Indirectbr edges cannot be split, so we must fail if we find one. + if (isa<IndirectBrInst>(P->getTerminator())) + return 0; + if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), - F); + Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); - DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "; - WriteAsOperand(dbgs(), BEBlock, false); - dbgs() << "\n"); + DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " + << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; @@ -642,9 +668,8 @@ // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); - PHINode *NewPN = PHINode::Create(PN->getType(), - BETerminator); - NewPN->reserveOperandSpace(BackedgeBlocks.size()); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the @@ -708,8 +733,6 @@ // Update dominator information DT->splitBlock(BEBlock); - if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) - DF->splitBlock(BEBlock); return BEBlock; } @@ -731,6 +754,7 @@ } assert(HasIndBrPred && "LoopSimplify has no excuse for missing loop header info!"); + (void)HasIndBrPred; } // Indirectbr can interfere with exit block canonicalization. @@ -738,12 +762,15 @@ bool HasIndBrExiting = false; SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { HasIndBrExiting = true; break; } + } + assert(HasIndBrExiting && "LoopSimplify has no excuse for missing exit block info!"); + (void)HasIndBrExiting; } }
diff --git a/src/LLVM/lib/Transforms/Utils/LoopUnroll.cpp b/src/LLVM/lib/Transforms/Utils/LoopUnroll.cpp new file mode 100644 index 0000000..62e4fa2 --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,425 @@ +//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities. It does not define any +// actual pass or policy, but provides a single function to perform loop +// unrolling. +// +// The process of unrolling can produce extraneous basic blocks linked with +// unconditional branches. This will be corrected in the future. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/BasicBlock.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +using namespace llvm; + +// TODO: Should these be here or in LoopUnroll? +STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); +STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); + +/// RemapInstruction - Convert the instruction operands from referencing the +/// current values into those specified by VMap. +static inline void RemapInstruction(Instruction *I, + ValueToValueMapTy &VMap) { + for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { + Value *Op = I->getOperand(op); + ValueToValueMapTy::iterator It = VMap.find(Op); + if (It != VMap.end()) + I->setOperand(op, It->second); + } + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i)); + if (It != VMap.end()) + PN->setIncomingBlock(i, cast<BasicBlock>(It->second)); + } + } +} + +/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it +/// only has one predecessor, and that predecessor only has one successor. +/// The LoopInfo Analysis that is passed will be kept consistent. +/// Returns the new combined block. +static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, + LPPassManager *LPM) { + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + BasicBlock *OnlyPred = BB->getSinglePredecessor(); + if (!OnlyPred) return 0; + + if (OnlyPred->getTerminator()->getNumSuccessors() != 1) + return 0; + + DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); + + // Resolve any PHI nodes at the start of the block. They are all + // guaranteed to have exactly one entry if they exist, unless there are + // multiple duplicate (but guaranteed to be equal) entries for the + // incoming edges. This occurs when there are multiple edges from + // OnlyPred to OnlySucc. + FoldSingleEntryPHINodes(BB); + + // Delete the unconditional branch from the predecessor... + OnlyPred->getInstList().pop_back(); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source... + BB->replaceAllUsesWith(OnlyPred); + + // Move all definitions in the successor to the predecessor... + OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); + + std::string OldName = BB->getName(); + + // Erase basic block from the function... + + // ScalarEvolution holds references to loop exit blocks. + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { + if (Loop *L = LI->getLoopFor(BB)) + SE->forgetLoop(L); + } + LI->removeBlock(BB); + BB->eraseFromParent(); + + // Inherit predecessor's name if it exists... + if (!OldName.empty() && !OnlyPred->hasName()) + OnlyPred->setName(OldName); + + return OnlyPred; +} + +/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true +/// if unrolling was successful, or false if the loop was unmodified. Unrolling +/// can only fail when the loop's latch block is not terminated by a conditional +/// branch instruction. However, if the trip count (and multiple) are not known, +/// loop unrolling will mostly produce more code that is no faster. +/// +/// TripCount is generally defined as the number of times the loop header +/// executes. UnrollLoop relaxes the definition to permit early exits: here +/// TripCount is the iteration on which control exits LatchBlock if no early +/// exits were taken. Note that UnrollLoop assumes that the loop counter test +/// terminates LatchBlock in order to remove unnecesssary instances of the +/// test. In other words, control may exit the loop prior to TripCount +/// iterations via an early branch, but control may not exit the loop from the +/// LatchBlock's terminator prior to TripCount iterations. +/// +/// Similarly, TripMultiple divides the number of times that the LatchBlock may +/// execute without exiting the loop. +/// +/// The LoopInfo Analysis that is passed will be kept consistent. +/// +/// If a LoopPassManager is passed in, and the loop is fully removed, it will be +/// removed from the LoopPassManager as well. LPM can also be NULL. +/// +/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are +/// available it must also preserve those analyses. +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); + return false; + } + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) { + DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); + return false; + } + + BasicBlock *Header = L->getHeader(); + BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + + if (!BI || BI->isUnconditional()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DEBUG(dbgs() << + " Can't unroll; loop not terminated by a conditional branch.\n"); + return false; + } + + if (Header->hasAddressTaken()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + DEBUG(dbgs() << + " Won't unroll loop: address of header block is taken.\n"); + return false; + } + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) + SE->forgetLoop(L); + + if (TripCount != 0) + DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); + if (TripMultiple != 1) + DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); + + // Effectively "DCE" unrolled iterations that are beyond the tripcount + // and will never be executed. + if (TripCount != 0 && Count > TripCount) + Count = TripCount; + + assert(Count > 0); + assert(TripMultiple > 0); + assert(TripCount == 0 || TripCount % TripMultiple == 0); + + // Are we eliminating the loop control altogether? + bool CompletelyUnroll = Count == TripCount; + + // If we know the trip count, we know the multiple... + unsigned BreakoutTrip = 0; + if (TripCount != 0) { + BreakoutTrip = TripCount % Count; + TripMultiple = 0; + } else { + // Figure out what multiple to use. + BreakoutTrip = TripMultiple = + (unsigned)GreatestCommonDivisor64(Count, TripMultiple); + } + + if (CompletelyUnroll) { + DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << TripCount << "!\n"); + } else { + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() + << " by " << Count); + if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { + DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + } else if (TripMultiple != 1) { + DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + } + DEBUG(dbgs() << "!\n"); + } + + std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); + + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); + BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + + // For the first iteration of the loop, we should use the precloned values for + // PHI nodes. Insert associations now. + ValueToValueMapTy LastValueMap; + std::vector<PHINode*> OrigPHINode; + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + OrigPHINode.push_back(cast<PHINode>(I)); + } + + std::vector<BasicBlock*> Headers; + std::vector<BasicBlock*> Latches; + Headers.push_back(Header); + Latches.push_back(LatchBlock); + + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + + for (unsigned It = 1; It != Count; ++It) { + std::vector<BasicBlock*> NewBlocks; + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); + Header->getParent()->getBasicBlockList().push_back(New); + + // Loop over all of the PHI nodes in the block, changing them to use the + // incoming values from the previous block. + if (*BB == Header) + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); + Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) + if (It > 1 && L->contains(InValI)) + InVal = LastValueMap[InValI]; + VMap[OrigPHINode[i]] = InVal; + New->getInstList().erase(NewPHI); + } + + // Update our running map of newest clones + LastValueMap[*BB] = New; + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) + LastValueMap[VI->first] = VI->second; + + L->addBasicBlockToLoop(New, LI->getBase()); + + // Add phi entries for newly created values to all exit blocks. + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); + SI != SE; ++SI) { + if (L->contains(*SI)) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + phi->addIncoming(Incoming, New); + } + } + // Keep track of new headers and latches as we create them, so that + // we can insert the proper branches later. + if (*BB == Header) + Headers.push_back(New); + if (*BB == LatchBlock) + Latches.push_back(New); + + NewBlocks.push_back(New); + } + + // Remap all instructions in the most recent iteration + for (unsigned i = 0; i < NewBlocks.size(); ++i) + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) + ::RemapInstruction(I, LastValueMap); + } + + // Loop over the PHI nodes in the original block, setting incoming values. + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *PN = OrigPHINode[i]; + if (CompletelyUnroll) { + PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); + Header->getInstList().erase(PN); + } + else if (Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } + } + + // Now that all the basic blocks for the unrolled iterations are in place, + // set up the branches to connect them. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The original branch was replicated in each unrolled iteration. + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + bool NeedConditional = true; + + // For a complete unroll, make the last iteration end with a branch + // to the exit block. + if (CompletelyUnroll && j == 0) { + Dest = LoopExit; + NeedConditional = false; + } + + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { + NeedConditional = false; + } + + if (NeedConditional) { + // Update the conditional branch's successor for the following + // iteration. + Term->setSuccessor(!ContinueOnTrue, Dest); + } else { + // Remove phi operands at this loop exit + if (Dest != LoopExit) { + BasicBlock *BB = Latches[i]; + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + if (*SI == Headers[i]) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { + Phi->removeIncomingValue(BB, false); + } + } + } + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + Term->eraseFromParent(); + } + } + + // Merge adjacent basic blocks, if possible. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + if (Term->isUnconditional()) { + BasicBlock *Dest = Term->getSuccessor(0); + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) + std::replace(Latches.begin(), Latches.end(), Dest, Fold); + } + } + + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) + DT->runOnFunction(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll) { + SmallVector<WeakVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, LPM, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); + for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), + BBE = NewLoopBlocks.end(); BB != BBE; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { + Instruction *Inst = I++; + + if (isInstructionTriviallyDead(Inst)) + (*BB)->getInstList().erase(Inst); + else if (Value *V = SimplifyInstruction(Inst)) + if (LI->replacementPreservesLCSSAForm(Inst, V)) { + Inst->replaceAllUsesWith(V); + (*BB)->getInstList().erase(Inst); + } + } + + NumCompletelyUnrolled += CompletelyUnroll; + ++NumUnrolled; + // Remove the loop from the LoopPassManager if it's completely removed. + if (CompletelyUnroll && LPM != NULL) + LPM->deleteLoopFromQueue(L); + + return true; +}
diff --git a/src/LLVM/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/src/LLVM/lib/Transforms/Utils/LowerExpectIntrinsic.cpp new file mode 100644 index 0000000..61ab3f6 --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,166 @@ +#define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/BasicBlock.h" +#include "llvm/LLVMContext.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <vector> + +using namespace llvm; + +STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled"); + +static cl::opt<uint32_t> +LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), + cl::desc("Weight of the branch likely to be taken (default = 64)")); +static cl::opt<uint32_t> +UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4), + cl::desc("Weight of the branch unlikely to be taken (default = 4)")); + +namespace { + + class LowerExpectIntrinsic : public FunctionPass { + + bool HandleSwitchExpect(SwitchInst *SI); + + bool HandleIfExpect(BranchInst *BI); + + public: + static char ID; + LowerExpectIntrinsic() : FunctionPass(ID) { + initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + }; +} + + +bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { + CallInst *CI = dyn_cast<CallInst>(SI->getCondition()); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + Type *Int32Ty = Type::getInt32Ty(Context); + + unsigned caseNo = SI->findCaseValue(ExpectedValue); + std::vector<Value *> Vec; + unsigned n = SI->getNumCases(); + Vec.resize(n + 1); // +1 for MDString + + Vec[0] = MDString::get(Context, "branch_weights"); + for (unsigned i = 0; i < n; ++i) { + Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight); + } + + MDNode *WeightsNode = llvm::MDNode::get(Context, Vec); + SI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + SI->setCondition(ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { + if (BI->isUnconditional()) + return false; + + // Handle non-optimized IR code like: + // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1) + // %tobool = icmp ne i64 %expval, 0 + // br i1 %tobool, label %if.then, label %if.end + + ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); + if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE) + return false; + + CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0)); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + Type *Int32Ty = Type::getInt32Ty(Context); + bool Likely = ExpectedValue->isOne(); + + // If expect value is equal to 1 it means that we are more likely to take + // branch 0, in other case more likely is branch 1. + Value *Ops[] = { + MDString::get(Context, "branch_weights"), + ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight), + ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight) + }; + + MDNode *WeightsNode = MDNode::get(Context, Ops); + BI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + CmpI->setOperand(0, ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::runOnFunction(Function &F) { + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *BB = I++; + + // Create "block_weights" metadata. + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (HandleIfExpect(BI)) + IfHandled++; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (HandleSwitchExpect(SI)) + IfHandled++; + } + + // remove llvm.expect intrinsics. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ) { + CallInst *CI = dyn_cast<CallInst>(BI++); + if (!CI) + continue; + + Function *Fn = CI->getCalledFunction(); + if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + Value *Exp = CI->getArgOperand(0); + CI->replaceAllUsesWith(Exp); + CI->eraseFromParent(); + } + } + } + + return false; +} + + +char LowerExpectIntrinsic::ID = 0; +INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' " + "Intrinsics", false, false) + +FunctionPass *llvm::createLowerExpectIntrinsicPass() { + return new LowerExpectIntrinsic(); +}
diff --git a/src/LLVM/lib/Transforms/Utils/LowerInvoke.cpp b/src/LLVM/lib/Transforms/Utils/LowerInvoke.cpp new file mode 100644 index 0000000..c96c8fc --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,608 @@ +//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which do not yet +// support stack unwinding. This pass supports two models of exception handling +// lowering, the 'cheap' support and the 'expensive' support. +// +// 'Cheap' exception handling support gives the program the ability to execute +// any program which does not "throw an exception", by turning 'invoke' +// instructions into calls and by turning 'unwind' instructions into calls to +// abort(). If the program does dynamically use the unwind instruction, the +// program will print a message then abort. +// +// 'Expensive' exception handling support gives the full exception handling +// support to the program at the cost of making the 'invoke' instruction +// really expensive. It basically inserts setjmp/longjmp calls to emulate the +// exception handling as necessary. +// +// Because the 'expensive' support slows down programs a lot, and EH is only +// used for a subset of the programs, it must be specifically enabled by an +// option. +// +// Note that after this pass runs the CFG is not entirely accurate (exceptional +// control flow edges are not correct anymore) so only very simple things should +// be done after the lowerinvoke pass has run (like generation of native code). +// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't +// support the invoke instruction yet" lowering pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowerinvoke" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" +#include <csetjmp> +#include <set> +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumUnwinds, "Number of unwinds replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", + cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code")); + +namespace { + class LowerInvoke : public FunctionPass { + // Used for both models. + Constant *AbortFn; + + // Used for expensive EH support. + StructType *JBLinkTy; + GlobalVariable *JBListHead; + Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; + bool useExpensiveEHSupport; + + // We peek in TLI to grab the target's jmp_buf size and alignment + const TargetLowering *TLI; + + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerInvoke(const TargetLowering *tli = NULL, + bool useExpensiveEHSupport = ExpensiveEHSupport) + : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), + TLI(tli) { + initializeLowerInvokePass(*PassRegistry::getPassRegistry()); + } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This is a cluster of orthogonal Transforms + AU.addPreserved("mem2reg"); + AU.addPreservedID(LowerSwitchID); + } + + private: + bool insertCheapEHSupport(Function &F); + void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes); + void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, AllocaInst *StackPtr, + SwitchInst *CatchSwitch); + bool insertExpensiveEHSupport(Function &F); + }; +} + +char LowerInvoke::ID = 0; +INITIALIZE_PASS(LowerInvoke, "lowerinvoke", + "Lower invoke and unwind, for unwindless code generators", + false, false) + +char &llvm::LowerInvokePassID = LowerInvoke::ID; + +// Public Interface To the LowerInvoke pass. +FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { + return new LowerInvoke(TLI, ExpensiveEHSupport); +} +FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, + bool useExpensiveEHSupport) { + return new LowerInvoke(TLI, useExpensiveEHSupport); +} + +// doInitialization - Make sure that there is a prototype for abort in the +// current module. +bool LowerInvoke::doInitialization(Module &M) { + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + if (useExpensiveEHSupport) { + // Insert a type for the linked list of jump buffers. + unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; + JBSize = JBSize ? JBSize : 200; + Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); + + JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty"); + Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; + JBLinkTy->setBody(Elts); + + Type *PtrJBList = PointerType::getUnqual(JBLinkTy); + + // Now that we've done that, insert the jmpbuf list head global, unless it + // already exists. + if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { + JBListHead = new GlobalVariable(M, PtrJBList, false, + GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(PtrJBList), + "llvm.sjljeh.jblist"); + } + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + + SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp); + +#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) + // let's return it to _setjmp state +# pragma pop_macro("setjmp") +# undef setjmp_undefined_for_msvc +#endif + + LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp); + StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); + } + + // We need the 'write' and 'abort' functions for both models. + AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), + (Type *)0); + return true; +} + +bool LowerInvoke::insertCheapEHSupport(Function &F) { + bool Changed = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); + // Insert a normal call instruction... + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Remove any PHI node entries from the exception destination. + II->getUnwindDest()->removePredecessor(BB); + + // Remove the invoke instruction now. + BB->getInstList().erase(II); + + ++NumInvokes; Changed = true; + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + // Insert a call to abort() + CallInst::Create(AbortFn, "", UI)->setTailCall(); + + // Insert a return instruction. This really should be a "barrier", as it + // is unreachable. + ReturnInst::Create(F.getContext(), + F.getReturnType()->isVoidTy() ? + 0 : Constant::getNullValue(F.getReturnType()), UI); + + // Remove the unwind instruction now. + BB->getInstList().erase(UI); + + ++NumUnwinds; Changed = true; + } + return Changed; +} + +/// rewriteExpensiveInvoke - Insert code and hack the function to replace the +/// specified invoke instruction with a call. +void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, + AllocaInst *StackPtr, + SwitchInst *CatchSwitch) { + ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo); + + // If the unwind edge has phi nodes, split the edge. + if (isa<PHINode>(II->getUnwindDest()->begin())) { + SplitCriticalEdge(II, 1, this); + + // If there are any phi nodes left, they must have a single predecessor. + while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + PN->eraseFromParent(); + } + } + + // Insert a store of the invoke num before the invoke and store zero into the + // location afterward. + new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile + + // Insert a store of the stack ptr before the invoke, so we can restore it + // later in the exception case. + CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II); + new StoreInst(StackSaveRet, StackPtr, true, II); // volatile + + BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt(); + // nonvolatile. + new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), + InvokeNum, false, NI); + + Instruction* StackPtrLoad = + new LoadInst(StackPtr, "stackptr.restore", true, + II->getUnwindDest()->getFirstInsertionPt()); + CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad); + + // Add a switch case to our unwind block. + CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); + + // Insert a normal call instruction. + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), + CallArgs, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Replace the invoke with an uncond branch. + BranchInst::Create(II->getNormalDest(), NewCall->getParent()); + II->eraseFromParent(); +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +// First thing we need to do is scan the whole function for values that are +// live across unwind edges. Each value that is live across an unwind edge +// we spill into a stack location, guaranteeing that there is nothing live +// across the unwind edge. This process also splits all critical edges +// coming out of invoke's. +void LowerInvoke:: +splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { + // First step, split all critical edges from invoke instructions. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + InvokeInst *II = Invokes[i]; + SplitCriticalEdge(II, 0, this); + SplitCriticalEdge(II, 1, this); + assert(!isa<PHINode>(II->getNormalDest()) && + !isa<PHINode>(II->getUnwindDest()) && + "critical edge splitting left single entry phi nodes?"); + } + + Function *F = Invokes.back()->getParent()->getParent(); + + // To avoid having to handle incoming arguments specially, we lower each arg + // to a copy instruction in the entry block. This ensures that the argument + // value itself cannot be live across the entry block. + BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsertPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) + ++AfterAllocaInsertPt; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) { + Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } + } + + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->use_back())->getParent() == BB && + !isa<PHINode>(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector<Instruction*,16> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User->getParent() != BB || isa<PHINode>(User)) + Users.push_back(User); + } + + // Scan all of the uses and see if the live range is live across an unwind + // edge. If we find a use live across an invoke edge, create an alloca + // and spill the value. + std::set<InvokeInst*> InvokesWithStoreInserted; + + // Find all of the blocks that this value is live in. + std::set<BasicBlock*> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } +} + +bool LowerInvoke::insertExpensiveEHSupport(Function &F) { + SmallVector<ReturnInst*,16> Returns; + SmallVector<UnwindInst*,16> Unwinds; + SmallVector<InvokeInst*,16> Invokes; + UnreachableInst* UnreachablePlaceholder = 0; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + // Remember all return instructions in case we insert an invoke into this + // function. + Returns.push_back(RI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Invokes.push_back(II); + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + Unwinds.push_back(UI); + } + + if (Unwinds.empty() && Invokes.empty()) return false; + + NumInvokes += Invokes.size(); + NumUnwinds += Unwinds.size(); + + // TODO: This is not an optimal way to do this. In particular, this always + // inserts setjmp calls into the entries of functions with invoke instructions + // even though there are possibly paths through the function that do not + // execute any invokes. In particular, for functions with early exits, e.g. + // the 'addMove' method in hexxagon, it would be nice to not have to do the + // setjmp stuff on the early exit path. This requires a bit of dataflow, but + // would not be too hard to do. + + // If we have an invoke instruction, insert a setjmp that dominates all + // invokes. After the setjmp, use a cond branch that goes to the original + // code path on zero, and to a designated 'catch' block of nonzero. + Value *OldJmpBufPtr = 0; + if (!Invokes.empty()) { + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge + // we spill into a stack location, guaranteeing that there is nothing live + // across the unwind edge. This process also splits all critical edges + // coming out of invoke's. + splitLiveRangesLiveAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be live across invokes. + unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; + AllocaInst *JmpBuf = + new AllocaInst(JBLinkTy, 0, Align, + "jblink", F.begin()->begin()); + + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf", + EntryBB->getTerminator()); + + // Copy the JBListHead to the alloca. + Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, + EntryBB->getTerminator()); + new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator()); + + // Add the new jumpbuf to the list. + new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator()); + + // Create the catch block. The catch block is basically a big switch + // statement that goes to all of the invoke catch blocks. + BasicBlock *CatchBB = + BasicBlock::Create(F.getContext(), "setjmp.catch", &F); + + // Create an alloca which keeps track of the stack pointer before every + // invoke, this allows us to properly restore the stack pointer after + // long jumping. + AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0, + "stackptr", EntryBB->begin()); + + // Create an alloca which keeps track of which invoke is currently + // executing. For normal calls it contains zero. + AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0, + "invokenum",EntryBB->begin()); + new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + InvokeNum, true, EntryBB->getTerminator()); + + // Insert a load in the Catch block, and a switch on its value. By default, + // we go to a block that just does an unwind (which is the correct action + // for a standard call). We insert an unreachable instruction here and + // modify the block to jump to the correct unwinding pad later. + BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); + UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB); + + Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); + SwitchInst *CatchSwitch = + SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB); + + // Now that things are set up, insert the setjmp call itself. + + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "setjmp.cont"); + + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf", + EntryBB->getTerminator()); + JmpBufPtr = new BitCastInst(JmpBufPtr, + Type::getInt8PtrTy(F.getContext()), + "tmp", EntryBB->getTerminator()); + Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret", + EntryBB->getTerminator()); + + // Compare the return value to zero. + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, SJRet, + Constant::getNullValue(SJRet->getType()), + "notunwind"); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB); + + // At this point, we are all set up, rewrite each invoke instruction. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch); + } + + // We know that there is at least one unwind. + + // Create three new blocks, the block to load the jmpbuf ptr and compare + // against null, the block to do the longjmp, and the error block for if it + // is null. Add them at the end of the function because they are not hot. + BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(), + "dounwind", &F); + BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F); + BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F); + + // If this function contains an invoke, restore the old jumpbuf ptr. + Value *BufPtr; + if (OldJmpBufPtr) { + // Before the return, insert a copy from the saved value to the new value. + BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler); + new StoreInst(BufPtr, JBListHead, UnwindHandler); + } else { + BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler); + } + + // Load the JBList, if it's null, then there was no catch! + Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr, + Constant::getNullValue(BufPtr->getType()), + "notnull"); + BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler); + + // Create the block to do the longjmp. + // Get a pointer to the jmpbuf and longjmp. + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; + Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock); + Idx[0] = new BitCastInst(Idx[0], + Type::getInt8PtrTy(F.getContext()), + "tmp", UnwindBlock); + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + CallInst::Create(LongJmpFn, Idx, "", UnwindBlock); + new UnreachableInst(F.getContext(), UnwindBlock); + + // Set up the term block ("throw without a catch"). + new UnreachableInst(F.getContext(), TermBlock); + + // Insert a call to abort() + CallInst::Create(AbortFn, "", + TermBlock->getTerminator())->setTailCall(); + + + // Replace all unwinds with a branch to the unwind handler. + for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { + BranchInst::Create(UnwindHandler, Unwinds[i]); + Unwinds[i]->eraseFromParent(); + } + + // Replace the inserted unreachable with a branch to the unwind handler. + if (UnreachablePlaceholder) { + BranchInst::Create(UnwindHandler, UnreachablePlaceholder); + UnreachablePlaceholder->eraseFromParent(); + } + + // Finally, for any returns from this function, if this function contains an + // invoke, restore the old jmpbuf pointer to its input value. + if (OldJmpBufPtr) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *R = Returns[i]; + + // Before the return, insert a copy from the saved value to the new value. + Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R); + new StoreInst(OldBuf, JBListHead, true, R); + } + } + + return true; +} + +bool LowerInvoke::runOnFunction(Function &F) { + if (useExpensiveEHSupport) + return insertExpensiveEHSupport(F); + else + return insertCheapEHSupport(F); +}
diff --git a/src/LLVM/lib/Transforms/Utils/LowerSwitch.cpp b/src/LLVM/lib/Transforms/Utils/LowerSwitch.cpp index 491e28e..686178c 100644 --- a/src/LLVM/lib/Transforms/Utils/LowerSwitch.cpp +++ b/src/LLVM/lib/Transforms/Utils/LowerSwitch.cpp
@@ -29,12 +29,13 @@ namespace { /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch - /// instructions. Note that this cannot be a BasicBlock pass because it - /// modifies the CFG! + /// instructions. class LowerSwitch : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - LowerSwitch() : FunctionPass(ID) {} + LowerSwitch() : FunctionPass(ID) { + initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); @@ -42,7 +43,7 @@ // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); AU.addPreserved("mem2reg"); - //AU.addPreservedID(LowerInvokePassID); + AU.addPreservedID(LowerInvokePassID); } struct CaseRange { @@ -50,8 +51,7 @@ Constant* High; BasicBlock* BB; - CaseRange() : Low(0), High(0), BB(0) { } - CaseRange(Constant* low, Constant* high, BasicBlock* bb) : + CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) : Low(low), High(high), BB(bb) { } }; @@ -81,10 +81,10 @@ } char LowerSwitch::ID = 0; -static RegisterPass<LowerSwitch> -X("lowerswitch", "Lower SwitchInst's to branches"); +INITIALIZE_PASS(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { @@ -109,7 +109,8 @@ // operator<< - Used for debugging purposes. // static raw_ostream& operator<<(raw_ostream &O, - const LowerSwitch::CaseVector &C) ATTRIBUTE_USED; + const LowerSwitch::CaseVector &C) + LLVM_ATTRIBUTE_USED; static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) { O << "["; @@ -154,12 +155,12 @@ // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. Function* F = OrigBlock->getParent(); - BasicBlock* NewNode = BasicBlock::Create(Val->getContext()); + BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, - Val, Pivot.Low); + Val, Pivot.Low, "Pivot"); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); return NewNode; @@ -176,7 +177,7 @@ BasicBlock* Default) { Function* F = OrigBlock->getParent(); - BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext()); + BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewLeaf); @@ -185,22 +186,26 @@ if (Leaf.Low == Leaf.High) { // Make the seteq instruction... Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, - Leaf.Low); + Leaf.Low, "SwitchLeaf"); } else { // Make range comparison if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { // Val >= Min && Val <= Hi --> Val <= Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); } else { // Emit V-Lo <=u Hi-Lo Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, + Val->getName()+".off", NewLeaf); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); } } @@ -272,11 +277,11 @@ BasicBlock *CurBlock = SI->getParent(); BasicBlock *OrigBlock = CurBlock; Function *F = CurBlock->getParent(); - Value *Val = SI->getOperand(0); // The value we are switching on... + Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); // If there is only the default destination, don't bother with the code below. - if (SI->getNumOperands() == 2) { + if (SI->getNumCases() == 1) { BranchInst::Create(SI->getDefaultDest(), CurBlock); CurBlock->getInstList().erase(SI); return; @@ -284,7 +289,7 @@ // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. - BasicBlock* NewDefault = BasicBlock::Create(SI->getContext()); + BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); F->getBasicBlockList().insert(Default, NewDefault); BranchInst::Create(Default, NewDefault);
diff --git a/src/LLVM/lib/Transforms/Utils/Mem2Reg.cpp b/src/LLVM/lib/Transforms/Utils/Mem2Reg.cpp new file mode 100644 index 0000000..f4ca81a --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,90 @@ +//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is a simple pass wrapper around the PromoteMemToReg function call +// exposed by the Utils library. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mem2reg" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPromoted, "Number of alloca's promoted"); + +namespace { + struct PromotePass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + PromotePass() : FunctionPass(ID) { + initializePromotePassPass(*PassRegistry::getPassRegistry()); + } + + // runOnFunction - To run this pass, first we calculate the alloca + // instructions that are safe for promotion, then we promote each one. + // + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); + // This is a cluster of orthogonal Transforms + AU.addPreserved<UnifyFunctionExitNodes>(); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerInvokePassID); + } + }; +} // end of anonymous namespace + +char PromotePass::ID = 0; +INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register", + false, false) + +bool PromotePass::runOnFunction(Function &F) { + std::vector<AllocaInst*> Allocas; + + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + + bool Changed = false; + + DominatorTree &DT = getAnalysis<DominatorTree>(); + + while (1) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) break; + + PromoteMemToReg(Allocas, DT); + NumPromoted += Allocas.size(); + Changed = true; + } + + return Changed; +} + +// createPromoteMemoryToRegister - Provide an entry point to create this pass. +// +FunctionPass *llvm::createPromoteMemoryToRegisterPass() { + return new PromotePass(); +}
diff --git a/src/LLVM/lib/Transforms/Utils/PACKAGE.vcxproj b/src/LLVM/lib/Transforms/Utils/PACKAGE.vcxproj new file mode 100644 index 0000000..42be7ab --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/PACKAGE.vcxproj
@@ -0,0 +1,277 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="MinSizeRel|Win32"> + <Configuration>MinSizeRel</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="RelWithDebInfo|Win32"> + <Configuration>RelWithDebInfo</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGUID>{1B050569-3318-48D9-8BB0-4DE9EF58B202}</ProjectGUID> + <Keyword>Win32Proj</Keyword> + <Platform>Win32</Platform> + <ProjectName>PACKAGE</ProjectName> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'" Label="Configuration"> + <ConfigurationType></ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.20506.1</_ProjectFileVersion> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + <IntDir Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">$(Platform)\$(Configuration)\$(ProjectName)\</IntDir> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> + <Midl> + <AdditionalIncludeDirectories>..\Utils;..\..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <OutputDirectory>$(IntDir)</OutputDirectory> + <HeaderFileName>%(Filename).h</HeaderFileName> + <TypeLibraryName>%(Filename).tlb</TypeLibraryName> + <InterfaceIdentifierFileName>%(Filename)_i.c</InterfaceIdentifierFileName> + <ProxyFileName>%(Filename)_p.c</ProxyFileName> + </Midl> + <PostBuildEvent> + <Message></Message> + <Command>setlocal +cd ..\..\..\..\LLVM +if %errorlevel% neq 0 goto :cmEnd +D: +if %errorlevel% neq 0 goto :cmEnd +"C:\Program Files (x86)\CMake 2.8\bin\cpack.exe" -C $(Configuration) --config ./CPackConfig.cmake +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + </PostBuildEvent> + </ItemDefinitionGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\PACKAGE_force</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'"> </Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +cd . +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles/PACKAGE_force.rule;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\PACKAGE_force</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeLists.txt"> + <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='MinSizeRel|Win32'">CMakeFiles\generate.stamp</Outputs> + <Message Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">Building Custom Rule CMakeLists.txt</Message> + <Command Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">setlocal +"C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -H../../.. -B../../.. --check-stamp-file CMakeFiles\generate.stamp +if %errorlevel% neq 0 goto :cmEnd +:cmEnd +endlocal & call :cmErrorLevel %errorlevel% & goto :cmDone +:cmErrorLevel +exit /b %1 +:cmDone +if %errorlevel% neq 0 goto :VCEnd</Command> + <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeLists.txt;CMakeLists.txt;CMakeLists.txt;%(AdditionalInputs)</AdditionalInputs> + <Outputs Condition="'$(Configuration)|$(Platform)'=='RelWithDebInfo|Win32'">CMakeFiles\generate.stamp</Outputs> + </CustomBuild> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\..\..\ALL_BUILD.vcxproj"> + <Project>17AECBCF-B2AE-4524-9010-9A175A8F6BFE</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file
diff --git a/src/LLVM/lib/Transforms/Utils/PACKAGE.vcxproj.filters b/src/LLVM/lib/Transforms/Utils/PACKAGE.vcxproj.filters new file mode 100644 index 0000000..a570359 --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/PACKAGE.vcxproj.filters
@@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="CMakeFiles\PACKAGE_force.rule"> + <Filter>CMake Rules</Filter> + </CustomBuild> + <CustomBuild Include="CMakeLists.txt" /> + </ItemGroup> + <ItemGroup> + <Filter Include="CMake Rules"> + <UniqueIdentifier>{71794486-B3CB-4A48-93CC-DE95557E96E1}</UniqueIdentifier> + </Filter> + </ItemGroup> + <ItemGroup> + </ItemGroup> +</Project>
diff --git a/src/LLVM/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/src/LLVM/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index f2f0612..db3e942 100644 --- a/src/LLVM/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/src/LLVM/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -9,10 +9,19 @@ // // This file promotes memory references to be register references. It promotes // alloca instructions which only have loads and stores as uses. An alloca is -// transformed by using dominator frontiers to place PHI nodes, then traversing -// the function in depth-first order to rewrite loads and stores as appropriate. -// This is just the standard SSA construction algorithm to construct "pruned" -// SSA form. +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +// The algorithm used here is based on: +// +// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. +// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of +// Programming Languages +// POPL '95. ACM, New York, NY, 62-73. +// +// It has been modified to not explicitly use the DJ graph data structure and to +// directly compute pruned SSA using per-variable liveness information. // //===----------------------------------------------------------------------===// @@ -24,9 +33,13 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -34,6 +47,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include <algorithm> +#include <queue> using namespace llvm; STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); @@ -72,13 +86,33 @@ UI != UE; ++UI) { // Loop over all of the uses of the alloca const User *U = *UI; if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (LI->isVolatile()) return false; } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (SI->isVolatile()) return false; + } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; } else { return false; } @@ -166,8 +200,7 @@ /// std::vector<AllocaInst*> Allocas; DominatorTree &DT; - DominanceFrontier &DF; - DIFactory *DIF; + DIBuilder *DIB; /// AST - An AliasSetTracker object to update. If null, don't update it. /// @@ -175,7 +208,7 @@ /// AllocaLookup - Reverse mapping of Allocas. /// - std::map<AllocaInst*, unsigned> AllocaLookup; + DenseMap<AllocaInst*, unsigned> AllocaLookup; /// NewPhiNodes - The PhiNodes we're adding. /// @@ -191,6 +224,11 @@ /// std::vector<Value*> PointerAllocaValues; + /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare + /// intrinsic that describes it, if any, so that we can convert it to a + /// dbg.value intrinsic if the alloca gets promoted. + SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; + /// Visited - The set of basic blocks the renamer has already visited. /// SmallPtrSet<BasicBlock*, 16> Visited; @@ -199,24 +237,21 @@ /// non-determinstic behavior. DenseMap<BasicBlock*, unsigned> BBNumbers; + /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. + DenseMap<DomTreeNode*, unsigned> DomLevels; + /// BBNumPreds - Lazily compute the number of predecessors a block has. DenseMap<const BasicBlock*, unsigned> BBNumPreds; public: PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, - DominanceFrontier &df, AliasSetTracker *ast) - : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} + AliasSetTracker *ast) + : Allocas(A), DT(dt), DIB(0), AST(ast) {} ~PromoteMem2Reg() { - delete DIF; + delete DIB; } void run(); - /// properlyDominates - Return true if I1 properly dominates I2. - /// - bool properlyDominates(Instruction *I1, Instruction *I2) const { - return DT.properlyDominates(I1->getParent(), I2->getParent()); - } - /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. /// bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { @@ -247,23 +282,23 @@ LargeBlockInfo &LBI); void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - + void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, std::vector<RenamePassData> &Worklist); - bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, - SmallPtrSet<PHINode*, 16> &InsertedPHINodes); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); }; struct AllocaInfo { - std::vector<BasicBlock*> DefiningBlocks; - std::vector<BasicBlock*> UsingBlocks; + SmallVector<BasicBlock*, 32> DefiningBlocks; + SmallVector<BasicBlock*, 32> UsingBlocks; StoreInst *OnlyStore; BasicBlock *OnlyBlock; bool OnlyUsedInOneBlock; Value *AllocaPointerVal; + DbgDeclareInst *DbgDeclare; void clear() { DefiningBlocks.clear(); @@ -272,6 +307,7 @@ OnlyBlock = 0; OnlyUsedInOneBlock = true; AllocaPointerVal = 0; + DbgDeclare = 0; } /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our @@ -306,15 +342,51 @@ OnlyUsedInOneBlock = false; } } + + DbgDeclare = FindAllocaDbgDeclare(AI); + } + }; + + typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; + + struct DomTreeNodeCompare { + bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { + return LHS.second < RHS.second; } }; } // end of anonymous namespace +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast<Instruction>(*UI); + ++UI; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + continue; + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Instruction *Inst = cast<Instruction>(*UI); + ++UI; + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} void PromoteMem2Reg::run() { - Function &F = *DF.getRoot()->getParent(); + Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); + AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; @@ -327,6 +399,8 @@ assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); + removeLifetimeIntrinsicUsers(AI); + if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); @@ -349,6 +423,13 @@ // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { + // Record debuginfo for the store and remove the declaration's debuginfo. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); + DDI->eraseFromParent(); + } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); @@ -377,6 +458,12 @@ // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } SI->eraseFromParent(); LBI.deleteValue(SI); } @@ -387,12 +474,35 @@ // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); + + // The alloca's debuginfo can be removed as well. + if (DbgDeclareInst *DDI = Info.DbgDeclare) + DDI->eraseFromParent(); ++NumLocalPromoted; continue; } } - + + // If we haven't computed dominator tree levels, do so now. + if (DomLevels.empty()) { + SmallVector<DomTreeNode*, 32> Worklist; + + DomTreeNode *Root = DT.getRootNode(); + DomLevels[Root] = 0; + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + unsigned ChildLevel = DomLevels[Node] + 1; + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); + CI != CE; ++CI) { + DomLevels[*CI] = ChildLevel; + Worklist.push_back(*CI); + } + } + } + // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { @@ -405,6 +515,9 @@ // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; + + // Remember the dbg.declare intrinsic describing this alloca, if any. + if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -451,15 +564,19 @@ Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in - // sections of dead code that were not processed on the dominance frontier. - // Just delete the users now. - // + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } + // Remove alloca's dbg.declare instrinsics from the function. + for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) + if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) + DDI->eraseFromParent(); + // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is @@ -471,9 +588,9 @@ for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; - + // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = PN->hasConstantValue(&DT)) { + if (Value *V = SimplifyInstruction(PN, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -625,7 +742,6 @@ /// avoiding insertion of dead phi nodes. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, AllocaInfo &Info) { - // Unique the set of defining blocks for efficient lookup. SmallPtrSet<BasicBlock*, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); @@ -635,47 +751,78 @@ SmallPtrSet<BasicBlock*, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); - // Compute the locations where PhiNodes need to be inserted. Look at the - // dominance frontier of EACH basic-block we have a write in. - unsigned CurrentVersion = 0; - SmallPtrSet<PHINode*, 16> InsertedPHINodes; - std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; - while (!Info.DefiningBlocks.empty()) { - BasicBlock *BB = Info.DefiningBlocks.back(); - Info.DefiningBlocks.pop_back(); - - // Look up the DF for this write, add it to defining blocks. - DominanceFrontier::const_iterator it = DF.find(BB); - if (it == DF.end()) continue; - - const DominanceFrontier::DomSetType &S = it->second; - - // In theory we don't need the indirection through the DFBlocks vector. - // In practice, the order of calling QueuePhiNode would depend on the - // (unspecified) ordering of basic blocks in the dominance frontier, - // which would give PHI nodes non-determinstic subscripts. Fix this by - // processing blocks in order of the occurance in the function. - for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), - PE = S.end(); P != PE; ++P) { - // If the frontier block is not in the live-in set for the alloca, don't - // bother processing it. - if (!LiveInBlocks.count(*P)) - continue; - - DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); - } - - // Sort by which the block ordering in the function. - if (DFBlocks.size() > 1) - std::sort(DFBlocks.begin(), DFBlocks.end()); - - for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { - BasicBlock *BB = DFBlocks[i].second; - if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) - Info.DefiningBlocks.push_back(BB); - } - DFBlocks.clear(); + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. + typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, + DomTreeNodeCompare> IDFPriorityQueue; + IDFPriorityQueue PQ; + + for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), + E = DefBlocks.end(); I != E; ++I) { + if (DomTreeNode *Node = DT.getNode(*I)) + PQ.push(std::make_pair(Node, DomLevels[Node])); } + + SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; + SmallPtrSet<DomTreeNode*, 32> Visited; + SmallVector<DomTreeNode*, 32> Worklist; + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNode *Root = RootPair.first; + unsigned RootLevel = RootPair.second; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + Worklist.clear(); + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + BasicBlock *BB = Node->getBlock(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; + ++SI) { + DomTreeNode *SuccNode = DT.getNode(*SI); + + // Quickly skip all CFG edges that are also dominator tree edges instead + // of catching them below. + if (SuccNode->getIDom() == Node) + continue; + + unsigned SuccLevel = DomLevels[SuccNode]; + if (SuccLevel > RootLevel) + continue; + + if (!Visited.insert(SuccNode)) + continue; + + BasicBlock *SuccBB = SuccNode->getBlock(); + if (!LiveInBlocks.count(SuccBB)) + continue; + + DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); + if (!DefBlocks.count(SuccBB)) + PQ.push(std::make_pair(SuccNode, SuccLevel)); + } + + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; + ++CI) { + if (!Visited.count(*CI)) + Worklist.push_back(*CI); + } + } + } + + if (DFBlocks.size() > 1) + std::sort(DFBlocks.begin(), DFBlocks.end()); + + unsigned CurrentVersion = 0; + for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) + QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); } /// RewriteSingleStoreAlloca - If there is only a single store to this value, @@ -840,8 +987,7 @@ // Alloca returns true if there wasn't already a phi-node for that variable // bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, - unsigned &Version, - SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { + unsigned &Version) { // Look up the basic-block in question. PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; @@ -850,13 +996,11 @@ // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. - PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; - PN->reserveOperandSpace(getNumPreds(BB)); - - InsertedPHINodes.insert(PN); if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); @@ -925,7 +1069,7 @@ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); if (!Src) continue; - std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); + DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); if (AI == AllocaLookup.end()) continue; Value *V = IncomingVals[AI->second]; @@ -941,13 +1085,18 @@ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); if (!Dest) continue; - std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); + DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); if (ai == AllocaLookup.end()) continue; // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); - + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } BB->getInstList().erase(SI); } } @@ -973,18 +1122,17 @@ } /// PromoteMemToReg - Promote the specified list of alloca instructions into -/// scalar registers, inserting PHI nodes as appropriate. This function makes -/// use of DominanceFrontier information. This function does not modify the CFG -/// of the function at all. All allocas must be from the same function. +/// scalar registers, inserting PHI nodes as appropriate. This function does +/// not modify the CFG of the function at all. All allocas must be from the +/// same function. /// /// If AST is specified, the specified tracker is updated to reflect changes /// made to the IR. /// void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, - DominatorTree &DT, DominanceFrontier &DF, - AliasSetTracker *AST) { + DominatorTree &DT, AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, DF, AST).run(); + PromoteMem2Reg(Allocas, DT, AST).run(); }
diff --git a/src/LLVM/lib/Transforms/Utils/SSAUpdater.cpp b/src/LLVM/lib/Transforms/Utils/SSAUpdater.cpp index a48234f..fa8061c 100644 --- a/src/LLVM/lib/Transforms/Utils/SSAUpdater.cpp +++ b/src/LLVM/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,15 +12,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" +#include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" + using namespace llvm; typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; @@ -29,20 +36,21 @@ } SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {} + : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete &getAvailableVals(AV); } /// Initialize - Reset this object to get ready for a new set of SSA -/// updates. ProtoValue is the value used to name PHI nodes. -void SSAUpdater::Initialize(Value *ProtoValue) { +/// updates with type 'Ty'. PHI nodes get a name based on 'Name'. +void SSAUpdater::Initialize(Type *Ty, StringRef Name) { if (AV == 0) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - PrototypeValue = ProtoValue; + ProtoType = Ty; + ProtoName = Name; } /// HasValueForBlock - Return true if the SSAUpdater already has a value for @@ -54,8 +62,8 @@ /// AddAvailableValue - Indicate that a rewritten value is available in the /// specified block with the specified value. void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { - assert(PrototypeValue != 0 && "Need to initialize SSAUpdater"); - assert(PrototypeValue->getType() == V->getType() && + assert(ProtoType != 0 && "Need to initialize SSAUpdater"); + assert(ProtoType == V->getType() && "All rewritten values must have the same type"); getAvailableVals(AV)[BB] = V; } @@ -148,7 +156,7 @@ // If there are no predecessors, just return undef. if (PredValues.empty()) - return UndefValue::get(PrototypeValue->getType()); + return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. if (SingularValue != 0) @@ -168,9 +176,8 @@ } // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), - &BB->front()); - InsertedPHI->reserveOperandSpace(PredValues.size()); + PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), + ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) @@ -178,11 +185,14 @@ // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. - if (Value *ConstVal = InsertedPHI->hasConstantValue()) { + if (Value *V = SimplifyInstruction(InsertedPHI)) { InsertedPHI->eraseFromParent(); - return ConstVal; + return V; } + // Set DebugLoc. + InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB)); + // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); @@ -204,6 +214,22 @@ U.set(V); } +/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However, +/// this version of the method can rewrite uses in the same block as a +/// definition, because it assumes that all uses of a value are below any +/// inserted values. +void SSAUpdater::RewriteUseAfterInsertions(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueAtEndOfBlock(User->getParent()); + + U.set(V); +} + /// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator /// in the SSAUpdaterImpl template. namespace { @@ -265,16 +291,15 @@ /// GetUndefVal - Get an undefined value of the same type as the value /// being handled. static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { - return UndefValue::get(Updater->PrototypeValue->getType()); + return UndefValue::get(Updater->ProtoType); } /// CreateEmptyPHI - Create a new PHI instruction in the specified block. /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(), - &BB->front()); - PHI->reserveOperandSpace(NumPreds); + PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, + Updater->ProtoName, &BB->front()); return PHI; } @@ -326,3 +351,170 @@ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); return Impl.GetValue(BB); } + +//===----------------------------------------------------------------------===// +// LoadAndStorePromoter Implementation +//===----------------------------------------------------------------------===// + +LoadAndStorePromoter:: +LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, + SSAUpdater &S, StringRef BaseName) : SSA(S) { + if (Insts.empty()) return; + + Value *SomeVal; + if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) + SomeVal = LI; + else + SomeVal = cast<StoreInst>(Insts[0])->getOperand(0); + + if (BaseName.empty()) + BaseName = SomeVal->getName(); + SSA.Initialize(SomeVal->getType(), BaseName); +} + + +void LoadAndStorePromoter:: +run(const SmallVectorImpl<Instruction*> &Insts) const { + + // First step: bucket up uses of the alloca by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + UsesByBlock[User->getParent()].push_back(User); + } + + // Okay, now we can iterate over all the blocks in the function with uses, + // processing them. Keep track of which loads are loading a live-in value. + // Walk the uses in the use-list order to be determinstic. + SmallVector<LoadInst*, 32> LiveInLoads; + DenseMap<Value*, Value*> ReplacedLoads; + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + BasicBlock *BB = User->getParent(); + TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + updateDebugInfo(SI); + SSA.AddAvailableValue(BB, SI->getOperand(0)); + } else + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + BlockUses.clear(); + continue; + } + + // Otherwise, check to see if this block is all loads. + bool HasStore = false; + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { + if (isa<StoreInst>(BlockUses[i])) { + HasStore = true; + break; + } + } + + // If so, we can queue them all as live in loads. We don't have an + // efficient way to tell which on is first in the block and don't want to + // scan large blocks, so just add all loads as live ins. + if (!HasStore) { + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) + LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + BlockUses.clear(); + continue; + } + + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + Value *StoredValue = 0; + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (LoadInst *L = dyn_cast<LoadInst>(II)) { + // If this is a load from an unrelated pointer, ignore it. + if (!isInstInList(L, Insts)) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + replaceLoadWithValue(L, StoredValue); + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(II)) { + // If this is a store to an unrelated pointer, ignore it. + if (!isInstInList(SI, Insts)) continue; + updateDebugInfo(SI); + + // Remember that this is the active value in the block. + StoredValue = SI->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { + LoadInst *ALoad = LiveInLoads[i]; + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + replaceLoadWithValue(ALoad, NewVal); + + // Avoid assertions in unreachable code. + if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType()); + ALoad->replaceAllUsesWith(NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // Allow the client to do stuff before we start nuking things. + doExtraRewritesBeforeFinalDeletion(); + + // Now that everything is rewritten, delete the old instructions from the + // function. They should all be dead now. + for (unsigned i = 0, e = Insts.size(); i != e; ++i) { + Instruction *User = Insts[i]; + + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + replaceLoadWithValue(cast<LoadInst>(User), NewVal); + User->replaceAllUsesWith(NewVal); + } + + instructionDeleted(User); + User->eraseFromParent(); + } +}
diff --git a/src/LLVM/lib/Transforms/Utils/SimplifyCFG.cpp b/src/LLVM/lib/Transforms/Utils/SimplifyCFG.cpp index 1177d96..b8c3ab4 100644 --- a/src/LLVM/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/src/LLVM/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,39 +19,58 @@ #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/NoFolder.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <functional> #include <set> #include <map> using namespace llvm; +static cl::opt<unsigned> +PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the amount of phi node folding to perform (default = 1)")); + +static cl::opt<bool> +DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), + cl::desc("Duplicate return instructions into unconditional branches")); + STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { class SimplifyCFGOpt { const TargetData *const TD; - ConstantInt *GetConstantInt(Value *V); - Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values); - Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values); - bool GatherValueComparisons(Instruction *Cond, Value *&CompVal, - std::vector<ConstantInt*> &Values); Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases); bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, - BasicBlock *Pred); - bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI); + BasicBlock *Pred, + IRBuilder<> &Builder); + bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder); + + bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); + bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder); + bool SimplifyUnreachable(UnreachableInst *UI); + bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); + bool SimplifyIndirectBr(IndirectBrInst *IBI); + bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder); + bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {} @@ -91,8 +110,6 @@ /// ExistPred, an existing predecessor of Succ. static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { - assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) != - succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!"); if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do PHINode *PN; @@ -102,28 +119,29 @@ } -/// GetIfCondition - Given a basic block (BB) with two predecessors (and -/// presumably PHI nodes in it), check to see if the merge at this block is due +/// GetIfCondition - Given a basic block (BB) with two predecessors (and at +/// least one PHI node in it), check to see if the merge at this block is due /// to an "if condition". If so, return the boolean condition that determines /// which entry into BB will be taken. Also, return by references the block /// that will be entered from if the condition is true, and the block that will /// be entered if the condition is false. /// -/// -static Value *GetIfCondition(BasicBlock *BB, - BasicBlock *&IfTrue, BasicBlock *&IfFalse) { - assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 && +/// This does no checking to see if the true/false blocks have large or unsavory +/// instructions in them. +static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = cast<PHINode>(BB->begin()); + assert(SomePHI->getNumIncomingValues() == 2 && "Function can only handle blocks with 2 predecessors!"); - BasicBlock *Pred1 = *pred_begin(BB); - BasicBlock *Pred2 = *++pred_begin(BB); + BasicBlock *Pred1 = SomePHI->getIncomingBlock(0); + BasicBlock *Pred2 = SomePHI->getIncomingBlock(1); // We can only handle branches. Other control flow will be lowered to // branches if possible anyway. - if (!isa<BranchInst>(Pred1->getTerminator()) || - !isa<BranchInst>(Pred2->getTerminator())) + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (Pred1Br == 0 || Pred2Br == 0) return 0; - BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator()); - BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator()); // Eliminate code duplication by ensuring that Pred1Br is conditional if // either are. @@ -140,6 +158,12 @@ } if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (Pred2->getSinglePredecessor() == 0) + return 0; + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -156,39 +180,29 @@ return 0; } - // The only thing we have to watch out for here is to make sure that Pred2 - // doesn't have incoming edges from other blocks. If it does, the condition - // doesn't dominate BB. - if (++pred_begin(Pred2) != pred_end(Pred2)) - return 0; - return Pred1Br->getCondition(); } // Ok, if we got here, both predecessors end with an unconditional branch to // BB. Don't panic! If both blocks only have a single (identical) // predecessor, and THAT is a conditional branch, then we're all ok! - if (pred_begin(Pred1) == pred_end(Pred1) || - ++pred_begin(Pred1) != pred_end(Pred1) || - pred_begin(Pred2) == pred_end(Pred2) || - ++pred_begin(Pred2) != pred_end(Pred2) || - *pred_begin(Pred1) != *pred_begin(Pred2)) + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) return 0; // Otherwise, if this is a conditional branch, then we can use it! - BasicBlock *CommonPred = *pred_begin(Pred1); - if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) { - assert(BI->isConditional() && "Two successors but not conditional?"); - if (BI->getSuccessor(0) == Pred1) { - IfTrue = Pred1; - IfFalse = Pred2; - } else { - IfTrue = Pred2; - IfFalse = Pred1; - } - return BI->getCondition(); + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (BI == 0) return 0; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; } - return 0; + return BI->getCondition(); } /// DominatesMergePoint - If we have a merge point of an "if condition" as @@ -197,11 +211,20 @@ /// which works well enough for us. /// /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to -/// see if V (which must be an instruction) is cheap to compute and is -/// non-trapping. If both are true, the instruction is inserted into the set -/// and true is returned. +/// see if V (which must be an instruction) and its recursive operands +/// that do not dominate BB have a combined cost lower than CostRemaining and +/// are non-trapping. If both are true, the instruction is inserted into the +/// set and true is returned. +/// +/// The cost for most non-trapping instructions is defined as 1 except for +/// Select whose cost is 2. +/// +/// After this function returns, CostRemaining is decreased by the cost of +/// V plus its non-dominating operands. If that cost is greater than +/// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, - std::set<Instruction*> *AggressiveInsts) { + SmallPtrSet<Instruction*, 4> *AggressiveInsts, + unsigned &CostRemaining) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -219,54 +242,80 @@ // If this instruction is defined in a block that contains an unconditional // branch to BB, then it must be in the 'conditional' part of the "if - // statement". - if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator())) - if (BI->isUnconditional() && BI->getSuccessor(0) == BB) { - if (!AggressiveInsts) return false; - // Okay, it looks like the instruction IS in the "condition". Check to - // see if it's a cheap instruction to unconditionally compute, and if it - // only uses stuff defined outside of the condition. If so, hoist it out. - if (!I->isSafeToSpeculativelyExecute()) - return false; + // statement". If not, it definitely dominates the region. + BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); + if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB) + return true; - switch (I->getOpcode()) { - default: return false; // Cannot hoist this out safely. - case Instruction::Load: { - // We have to check to make sure there are no instructions before the - // load in its basic block, as we are going to hoist the loop out to - // its predecessor. - BasicBlock::iterator IP = PBB->begin(); - if (IP != BasicBlock::iterator(I)) - return false; - break; - } - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::ICmp: - break; // These are all cheap and non-trapping instructions. - } + // If we aren't allowing aggressive promotion anymore, then don't consider + // instructions in the 'if region'. + if (AggressiveInsts == 0) return false; + + // If we have seen this instruction before, don't count it again. + if (AggressiveInsts->count(I)) return true; - // Okay, we can only really hoist these out if their operands are not - // defined in the conditional region. - for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, 0)) - return false; - // Okay, it's safe to do this! Remember this instruction. - AggressiveInsts->insert(I); - } + // Okay, it looks like the instruction IS in the "condition". Check to + // see if it's a cheap instruction to unconditionally compute, and if it + // only uses stuff defined outside of the condition. If so, hoist it out. + if (!I->isSafeToSpeculativelyExecute()) + return false; + unsigned Cost = 0; + + switch (I->getOpcode()) { + default: return false; // Cannot hoist this out safely. + case Instruction::Load: + // We have to check to make sure there are no instructions before the + // load in its basic block, as we are going to hoist the load out to its + // predecessor. + if (PBB->getFirstNonPHIOrDbg() != I) + return false; + Cost = 1; + break; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices()) + return false; + Cost = 1; + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + Cost = 1; + break; // These are all cheap and non-trapping instructions. + + case Instruction::Select: + Cost = 2; + break; + } + + if (Cost > CostRemaining) + return false; + + CostRemaining -= Cost; + + // Okay, we can only really hoist these out if their operands do + // not take us over the cost threshold. + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) + return false; + // Okay, it's safe to do this! Remember this instruction. + AggressiveInsts->insert(I); return true; } /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) { +static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy()) @@ -274,7 +323,7 @@ // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - const IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -294,77 +343,94 @@ return 0; } -/// GatherConstantSetEQs - Given a potentially 'or'd together collection of -/// icmp_eq instructions that compare a value against a constant, return the -/// value being compared, and stick the constant into the Values vector. -Value *SimplifyCFGOpt:: -GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) { - if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Inst->getOpcode() == Instruction::ICmp && - cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) { - if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) { - Values.push_back(C); - return Inst->getOperand(0); - } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) { - Values.push_back(C); - return Inst->getOperand(1); +/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together +/// collection of icmp eq/ne instructions that compare a value against a +/// constant, return the value being compared, and stick the constant into the +/// Values vector. +static Value * +GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, + const TargetData *TD, bool isEQ, unsigned &UsedICmps) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return 0; + + // If this is an icmp against a constant, handle this as one of the cases. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { + if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { + UsedICmps++; + Vals.push_back(C); + return I->getOperand(0); } - } else if (Inst->getOpcode() == Instruction::Or) { - if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values)) - if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values)) - if (LHS == RHS) - return LHS; + + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to + // the set. + ConstantRange Span = + ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); + + // If this is an and/!= check then we want to optimize "x ugt 2" into + // x != 0 && x != 1. + if (!isEQ) + Span = Span.inverse(); + + // If there are a ton of values, we don't want to make a ginormous switch. + if (Span.getSetSize().ugt(8) || Span.isEmptySet() || + // We don't handle wrapped sets yet. + Span.isWrappedSet()) + return 0; + + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) + Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); + UsedICmps++; + return I->getOperand(0); } + return 0; } + + // Otherwise, we can only handle an | or &, depending on isEQ. + if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) + return 0; + + unsigned NumValsBeforeLHS = Vals.size(); + unsigned UsedICmpsBeforeLHS = UsedICmps; + if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, + isEQ, UsedICmps)) { + unsigned NumVals = Vals.size(); + unsigned UsedICmpsBeforeRHS = UsedICmps; + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + isEQ, UsedICmps)) { + if (LHS == RHS) + return LHS; + Vals.resize(NumVals); + UsedICmps = UsedICmpsBeforeRHS; + } + + // The RHS of the or/and can't be folded in and we haven't used "Extra" yet, + // set it and return success. + if (Extra == 0 || Extra == I->getOperand(1)) { + Extra = I->getOperand(1); + return LHS; + } + + Vals.resize(NumValsBeforeLHS); + UsedICmps = UsedICmpsBeforeLHS; + return 0; + } + + // If the LHS can't be folded in, but Extra is available and RHS can, try to + // use LHS as Extra. + if (Extra == 0 || Extra == I->getOperand(0)) { + Value *OldExtra = Extra; + Extra = I->getOperand(0); + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + isEQ, UsedICmps)) + return RHS; + assert(Vals.size() == NumValsBeforeLHS); + Extra = OldExtra; + } + return 0; } - -/// GatherConstantSetNEs - Given a potentially 'and'd together collection of -/// setne instructions that compare a value against a constant, return the value -/// being compared, and stick the constant into the Values vector. -Value *SimplifyCFGOpt:: -GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) { - if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Inst->getOpcode() == Instruction::ICmp && - cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) { - if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) { - Values.push_back(C); - return Inst->getOperand(0); - } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) { - Values.push_back(C); - return Inst->getOperand(1); - } - } else if (Inst->getOpcode() == Instruction::And) { - if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values)) - if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values)) - if (LHS == RHS) - return LHS; - } - } - return 0; -} - -/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a -/// bunch of comparisons of one value against constants, return the value and -/// the constants being compared. -bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal, - std::vector<ConstantInt*> &Values) { - if (Cond->getOpcode() == Instruction::Or) { - CompVal = GatherConstantSetEQs(Cond, Values); - - // Return true to indicate that the condition is true if the CompVal is - // equal to one of the constants. - return true; - } else if (Cond->getOpcode() == Instruction::And) { - CompVal = GatherConstantSetNEs(Cond, Values); - - // Return false to indicate that the condition is false if the CompVal is - // equal to one of the constants. - return false; - } - return false; -} - + static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { Instruction* Cond = 0; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { @@ -372,6 +438,8 @@ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) Cond = dyn_cast<Instruction>(BI->getCondition()); + } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) { + Cond = dyn_cast<Instruction>(IBI->getAddress()); } TI->eraseFromParent(); @@ -393,7 +461,7 @@ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || ICI->getPredicate() == ICmpInst::ICMP_NE) && - GetConstantInt(ICI->getOperand(1))) + GetConstantInt(ICI->getOperand(1), TD)) CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. @@ -418,7 +486,7 @@ BranchInst *BI = cast<BranchInst>(TI); ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); - Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)), + Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD), BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE))); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); @@ -457,8 +525,8 @@ } // Otherwise, just sort both lists and compare element by element. - std::sort(V1->begin(), V1->end()); - std::sort(V2->begin(), V2->end()); + array_pod_sort(V1->begin(), V1->end()); + array_pod_sort(V2->begin(), V2->end()); unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); while (i1 != e1 && i2 != e2) { if ((*V1)[i1].first == (*V2)[i2].first) @@ -479,7 +547,8 @@ /// form of jump threading. bool SimplifyCFGOpt:: SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, - BasicBlock *Pred) { + BasicBlock *Pred, + IRBuilder<> &Builder) { Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); if (!PredVal) return false; // Not a value comparison in predecessor. @@ -504,90 +573,87 @@ // If we are here, we know that the value is none of those cases listed in // PredCases. If there are any cases in ThisCases that are in PredCases, we // can simplify TI. - if (ValuesOverlap(PredCases, ThisCases)) { - if (isa<BranchInst>(TI)) { - // Okay, one of the successors of this condbr is dead. Convert it to a - // uncond br. - assert(ThisCases.size() == 1 && "Branch can only have one case!"); - // Insert the new branch. - Instruction *NI = BranchInst::Create(ThisDef, TI); - (void) NI; + if (!ValuesOverlap(PredCases, ThisCases)) + return false; + + if (isa<BranchInst>(TI)) { + // Okay, one of the successors of this condbr is dead. Convert it to a + // uncond br. + assert(ThisCases.size() == 1 && "Branch can only have one case!"); + // Insert the new branch. + Instruction *NI = Builder.CreateBr(ThisDef); + (void) NI; - // Remove PHI node entries for the dead edge. - ThisCases[0].second->removePredecessor(TI->getParent()); + // Remove PHI node entries for the dead edge. + ThisCases[0].second->removePredecessor(TI->getParent()); - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); - EraseTerminatorInstAndDCECond(TI); - return true; - - } else { - SwitchInst *SI = cast<SwitchInst>(TI); - // Okay, TI has cases that are statically dead, prune them away. - SmallPtrSet<Constant*, 16> DeadCases; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - DeadCases.insert(PredCases[i].first); - - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI); - - for (unsigned i = SI->getNumCases()-1; i != 0; --i) - if (DeadCases.count(SI->getCaseValue(i))) { - SI->getSuccessor(i)->removePredecessor(TI->getParent()); - SI->removeCase(i); - } - - DEBUG(dbgs() << "Leaving: " << *TI << "\n"); - return true; - } + EraseTerminatorInstAndDCECond(TI); + return true; } - - } else { - // Otherwise, TI's block must correspond to some matched value. Find out - // which value (or set of values) this is. - ConstantInt *TIV = 0; - BasicBlock *TIBB = TI->getParent(); + + SwitchInst *SI = cast<SwitchInst>(TI); + // Okay, TI has cases that are statically dead, prune them away. + SmallPtrSet<Constant*, 16> DeadCases; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second == TIBB) { - if (TIV == 0) - TIV = PredCases[i].first; - else - return false; // Cannot handle multiple values coming to this block. - } - assert(TIV && "No edge from pred to succ?"); - - // Okay, we found the one constant that our value can be if we get into TI's - // BB. Find out which successor will unconditionally be branched to. - BasicBlock *TheRealDest = 0; - for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) - if (ThisCases[i].first == TIV) { - TheRealDest = ThisCases[i].second; - break; - } - - // If not handled by any explicit cases, it is handled by the default case. - if (TheRealDest == 0) TheRealDest = ThisDef; - - // Remove PHI node entries for dead edges. - BasicBlock *CheckEdge = TheRealDest; - for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) - if (*SI != CheckEdge) - (*SI)->removePredecessor(TIBB); - else - CheckEdge = 0; - - // Insert the new branch. - Instruction *NI = BranchInst::Create(TheRealDest, TI); - (void) NI; + DeadCases.insert(PredCases[i].first); DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + << "Through successor TI: " << *TI); - EraseTerminatorInstAndDCECond(TI); + for (unsigned i = SI->getNumCases()-1; i != 0; --i) + if (DeadCases.count(SI->getCaseValue(i))) { + SI->getSuccessor(i)->removePredecessor(TI->getParent()); + SI->removeCase(i); + } + + DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } - return false; + + // Otherwise, TI's block must correspond to some matched value. Find out + // which value (or set of values) this is. + ConstantInt *TIV = 0; + BasicBlock *TIBB = TI->getParent(); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second == TIBB) { + if (TIV != 0) + return false; // Cannot handle multiple values coming to this block. + TIV = PredCases[i].first; + } + assert(TIV && "No edge from pred to succ?"); + + // Okay, we found the one constant that our value can be if we get into TI's + // BB. Find out which successor will unconditionally be branched to. + BasicBlock *TheRealDest = 0; + for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) + if (ThisCases[i].first == TIV) { + TheRealDest = ThisCases[i].second; + break; + } + + // If not handled by any explicit cases, it is handled by the default case. + if (TheRealDest == 0) TheRealDest = ThisDef; + + // Remove PHI node entries for dead edges. + BasicBlock *CheckEdge = TheRealDest; + for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) + if (*SI != CheckEdge) + (*SI)->removePredecessor(TIBB); + else + CheckEdge = 0; + + // Insert the new branch. + Instruction *NI = Builder.CreateBr(TheRealDest); + (void) NI; + + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); + + EraseTerminatorInstAndDCECond(TI); + return true; } namespace { @@ -601,11 +667,22 @@ }; } +static int ConstantIntSortPredicate(const void *P1, const void *P2) { + const ConstantInt *LHS = *(const ConstantInt**)P1; + const ConstantInt *RHS = *(const ConstantInt**)P2; + if (LHS->getValue().ult(RHS->getValue())) + return 1; + if (LHS->getValue() == RHS->getValue()) + return 0; + return -1; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons /// on the same value. If so, and if safe to do so, fold them together. -bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, + IRBuilder<> &Builder) { BasicBlock *BB = TI->getParent(); Value *CV = isValueEqualityComparison(TI); // CondVal assert(CV && "Not a comparison?"); @@ -698,16 +775,18 @@ for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without TargetData"); - CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()), - PTI); + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), + "magicptr"); } // Now that the successors are updated, create the new Switch instruction. - SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault, - PredCases.size(), PTI); + SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, + PredCases.size()); + NewSI->setDebugLoc(PTI->getDebugLoc()); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) NewSI->addCase(PredCases[i].first, PredCases[i].second); @@ -723,7 +802,7 @@ // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) InfLoopBlock = BasicBlock::Create(BB->getContext(), - BB->getParent()); + "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); } NewSI->setSuccessor(i, InfLoopBlock); @@ -771,13 +850,17 @@ BasicBlock::iterator BB2_Itr = BB2->begin(); Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; - while (ISA_DEBUG_INFO_INTRINSIC(I1)) - I1 = BB1_Itr++; - while (ISA_DEBUG_INFO_INTRINSIC(I2)) - I2 = BB2_Itr++; - if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || - !I1->isIdenticalToWhenDefined(I2) || - (ISA_INVOKE_INST(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || + (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; // If we get here, we can hoist at least one instruction. @@ -796,22 +879,26 @@ if (!I2->use_empty()) I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); - BB2->getInstList().erase(I2); + I2->eraseFromParent(); I1 = BB1_Itr++; - while (ISA_DEBUG_INFO_INTRINSIC(I1)) - I1 = BB1_Itr++; I2 = BB2_Itr++; - while (ISA_DEBUG_INFO_INTRINSIC(I2)) - I2 = BB2_Itr++; - } while (I1->getOpcode() == I2->getOpcode() && - I1->isIdenticalToWhenDefined(I2)); + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + } while (I1->isIdenticalToWhenDefined(I2)); return true; HoistTerminator: // It may not be possible to hoist an invoke. - if (ISA_INVOKE_INST(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) + if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) return true; // Okay, it is safe to hoist the terminator. @@ -823,6 +910,7 @@ NT->takeName(I1); } + IRBuilder<true, NoFolder> Builder(NT); // Hoisting one of the terminators from our successor is a great thing. // Unfortunately, the successors of the if/else blocks may have PHI nodes in // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI @@ -834,18 +922,20 @@ (PN = dyn_cast<PHINode>(BBI)); ++BBI) { Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); - if (BB1V != BB2V) { - // These values do not agree. Insert a select instruction before NT - // that determines the right value. - SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) - SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V, - NT); - // Make the PHI node use the select for all incoming values for BB1/BB2 - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) - PN->setIncomingValue(i, SI); - } + if (BB1V == BB2V) continue; + + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (SI == 0) + SI = cast<SelectInst> + (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, + BB1V->getName()+"."+BB2V->getName())); + + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) + PN->setIncomingValue(i, SI); } } @@ -870,21 +960,19 @@ BBI != BBE; ++BBI) { Instruction *I = BBI; // Skip debug info. - if (ISA_DEBUG_INFO_INTRINSIC(I)) continue; - if (I == Term) break; + if (isa<DbgInfoIntrinsic>(I)) continue; + if (I == Term) break; - if (!HInst) - HInst = I; - else + if (HInst) return false; + HInst = I; } if (!HInst) return false; // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); - if (isa<Instruction>(BrCond) && - cast<Instruction>(BrCond)->getOpcode() == Instruction::FCmp) + if (isa<FCmpInst>(BrCond)) return false; // If BB1 is actually on the false edge of the conditional branch, remember @@ -978,7 +1066,7 @@ if (InsertPos != BIParent->begin()) --InsertPos; // Skip debug info between condition and branch. - while (InsertPos != BIParent->begin() && ISA_DEBUG_INFO_INTRINSIC(InsertPos)) + while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos)) --InsertPos; if (InsertPos == BrCond && !isa<PHINode>(BrCond)) { SmallPtrSet<Instruction *, 4> BB1Insns; @@ -988,12 +1076,12 @@ for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end(); UI != UE; ++UI) { Instruction *Use = cast<Instruction>(*UI); - if (BB1Insns.count(Use)) { - // If BrCond uses the instruction that place it just before - // branch instruction. - InsertPos = BI; - break; - } + if (!BB1Insns.count(Use)) continue; + + // If BrCond uses the instruction that place it just before + // branch instruction. + InsertPos = BI; + break; } } else InsertPos = BI; @@ -1001,21 +1089,23 @@ // Create a select whose true value is the speculatively executed value and // false value is the previously determined FalseV. + IRBuilder<true, NoFolder> Builder(BI); SelectInst *SI; if (Invert) - SI = SelectInst::Create(BrCond, FalseV, HInst, - BI); + SI = cast<SelectInst> + (Builder.CreateSelect(BrCond, FalseV, HInst, + FalseV->getName() + "." + HInst->getName())); else - SI = SelectInst::Create(BrCond, HInst, FalseV, - BI); + SI = cast<SelectInst> + (Builder.CreateSelect(BrCond, HInst, FalseV, + HInst->getName() + "." + FalseV->getName())); // Make the PHI node use the select for all incoming values for "then" and // "if" blocks. for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) { PHINode *PN = PHIUses[i]; for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j) - if (PN->getIncomingBlock(j) == BB1 || - PN->getIncomingBlock(j) == BIParent) + if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent) PN->setIncomingValue(j, SI); } @@ -1030,7 +1120,7 @@ unsigned Size = 0; for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { - if (ISA_DEBUG_INFO_INTRINSIC(BBI)) + if (isa<DbgInfoIntrinsic>(BBI)) continue; if (Size > 10) return false; // Don't clone large BB's. ++Size; @@ -1053,7 +1143,7 @@ /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -1073,77 +1163,75 @@ // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantInt *CB; - if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && - CB->getType()->isIntegerTy(1)) { - // Okay, we now know that all edges from PredBB should be revectored to - // branch to RealDest. - BasicBlock *PredBB = PN->getIncomingBlock(i); - BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - - if (RealDest == BB) continue; // Skip self loops. - - // The dest block might have PHI nodes, other predecessors and other - // difficult cases. Instead of being smart about this, just insert a new - // block that jumps to the destination block, effectively splitting - // the edge we are about to create. - BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), - RealDest->getParent(), RealDest); - BranchInst::Create(RealDest, EdgeBB); - PHINode *PN; - for (BasicBlock::iterator BBI = RealDest->begin(); - (PN = dyn_cast<PHINode>(BBI)); ++BBI) { - Value *V = PN->getIncomingValueForBlock(BB); - PN->addIncoming(V, EdgeBB); - } + ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); + if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; + + // Okay, we now know that all edges from PredBB should be revectored to + // branch to RealDest. + BasicBlock *PredBB = PN->getIncomingBlock(i); + BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + + if (RealDest == BB) continue; // Skip self loops. + // Skip if the predecessor's terminator is an indirect branch. + if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; + + // The dest block might have PHI nodes, other predecessors and other + // difficult cases. Instead of being smart about this, just insert a new + // block that jumps to the destination block, effectively splitting + // the edge we are about to create. + BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), + RealDest->getName()+".critedge", + RealDest->getParent(), RealDest); + BranchInst::Create(RealDest, EdgeBB); + + // Update PHI nodes. + AddPredecessorToBlock(RealDest, EdgeBB, BB); - // BB may have instructions that are being threaded over. Clone these - // instructions into EdgeBB. We know that there will be no uses of the - // cloned instructions outside of EdgeBB. - BasicBlock::iterator InsertPt = EdgeBB->begin(); - std::map<Value*, Value*> TranslateMap; // Track translated values. - for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { - if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); - } else { - // Clone the instruction. - Instruction *N = BBI->clone(); - if (BBI->hasName()) N->setName(BBI->getName()+".c"); - - // Update operands due to translation. - for (User::op_iterator i = N->op_begin(), e = N->op_end(); - i != e; ++i) { - std::map<Value*, Value*>::iterator PI = - TranslateMap.find(*i); - if (PI != TranslateMap.end()) - *i = PI->second; - } - - // Check for trivial simplification. - if (Constant *C = ConstantFoldInstruction(N)) { - TranslateMap[BBI] = C; - delete N; // Constant folded away, don't need actual inst - } else { - // Insert the new instruction into its new home. - EdgeBB->getInstList().insert(InsertPt, N); - if (!BBI->use_empty()) - TranslateMap[BBI] = N; - } - } + // BB may have instructions that are being threaded over. Clone these + // instructions into EdgeBB. We know that there will be no uses of the + // cloned instructions outside of EdgeBB. + BasicBlock::iterator InsertPt = EdgeBB->begin(); + DenseMap<Value*, Value*> TranslateMap; // Track translated values. + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); + continue; } - - // Loop over all of the edges from PredBB to BB, changing them to branch - // to EdgeBB instead. - TerminatorInst *PredBBTI = PredBB->getTerminator(); - for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) - if (PredBBTI->getSuccessor(i) == BB) { - BB->removePredecessor(PredBB); - PredBBTI->setSuccessor(i, EdgeBB); - } + // Clone the instruction. + Instruction *N = BBI->clone(); + if (BBI->hasName()) N->setName(BBI->getName()+".c"); - // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI) | true; + // Update operands due to translation. + for (User::op_iterator i = N->op_begin(), e = N->op_end(); + i != e; ++i) { + DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i); + if (PI != TranslateMap.end()) + *i = PI->second; + } + + // Check for trivial simplification. + if (Value *V = SimplifyInstruction(N, TD)) { + TranslateMap[BBI] = V; + delete N; // Instruction folded away, don't need actual inst + } else { + // Insert the new instruction into its new home. + EdgeBB->getInstList().insert(InsertPt, N); + if (!BBI->use_empty()) + TranslateMap[BBI] = N; + } } + + // Loop over all of the edges from PredBB to BB, changing them to branch + // to EdgeBB instead. + TerminatorInst *PredBBTI = PredBB->getTerminator(); + for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) + if (PredBBTI->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB); + PredBBTI->setSuccessor(i, EdgeBB); + } + + // Recurse, simplifying any other constants. + return FoldCondBranchOnPHI(BI, TD) | true; } return false; @@ -1151,18 +1239,20 @@ /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN) { +static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which // subsequently causes this merge to happen. We really want control // dependence information for this check, but simplifycfg can't keep it up // to date, and this catches most of the cases we care about anyway. - // BasicBlock *BB = PN->getParent(); BasicBlock *IfTrue, *IfFalse; Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); - if (!IfCond) return false; + if (!IfCond || + // Don't bother if the branch will be constant folded trivially. + isa<ConstantInt>(IfCond)) + return false; // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. @@ -1174,43 +1264,54 @@ if (NumPhis > 2) return false; - DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " - << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. - std::set<Instruction*> AggressiveInsts; + SmallPtrSet<Instruction*, 4> AggressiveInsts; + unsigned MaxCostVal0 = PHINodeFoldingThreshold, + MaxCostVal1 = PHINodeFoldingThreshold; - BasicBlock::iterator AfterPHIIt = BB->begin(); - while (isa<PHINode>(AfterPHIIt)) { - PHINode *PN = cast<PHINode>(AfterPHIIt++); - if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) { - if (PN->getIncomingValue(0) != PN) - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - else - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB, - &AggressiveInsts) || - !DominatesMergePoint(PN->getIncomingValue(1), BB, - &AggressiveInsts)) { - return false; + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { + PHINode *PN = cast<PHINode>(II++); + if (Value *V = SimplifyInstruction(PN, TD)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + continue; } + + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, + MaxCostVal0) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, + MaxCostVal1)) + return false; } + // If we folded the the first phi, PN dangles at this point. Refresh it. If + // we ran out of PHIs then we simplified them all. + PN = dyn_cast<PHINode>(BB->begin()); + if (PN == 0) return true; + + // Don't fold i1 branches on PHIs which contain binary operators. These can + // often be turned into switches and other things. + if (PN->getType()->isIntegerTy(1) && + (isa<BinaryOperator>(PN->getIncomingValue(0)) || + isa<BinaryOperator>(PN->getIncomingValue(1)) || + isa<BinaryOperator>(IfCond))) + return false; + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not // worth promoting to select instructions. - BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0; - PN = cast<PHINode>(BB->begin()); - BasicBlock *Pred = PN->getIncomingBlock(0); - if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) { - IfBlock1 = Pred; - DomBlock = *pred_begin(Pred); - for (BasicBlock::iterator I = Pred->begin(); - !isa<TerminatorInst>(I); ++I) - if (!AggressiveInsts.count(I) && !ISA_DEBUG_INFO_INTRINSIC(I)) { + BasicBlock *DomBlock = 0; + BasicBlock *IfBlock1 = PN->getIncomingBlock(0); + BasicBlock *IfBlock2 = PN->getIncomingBlock(1); + if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { + IfBlock1 = 0; + } else { + DomBlock = *pred_begin(IfBlock1); + for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I) + if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control // flow, so the xform is not worth it. @@ -1218,73 +1319,65 @@ } } - Pred = PN->getIncomingBlock(1); - if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) { - IfBlock2 = Pred; - DomBlock = *pred_begin(Pred); - for (BasicBlock::iterator I = Pred->begin(); - !isa<TerminatorInst>(I); ++I) - if (!AggressiveInsts.count(I) && !ISA_DEBUG_INFO_INTRINSIC(I)) { + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { + IfBlock2 = 0; + } else { + DomBlock = *pred_begin(IfBlock2); + for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I) + if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control // flow, so the xform is not worth it. return false; } } + + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. - + Instruction *InsertPt = DomBlock->getTerminator(); + IRBuilder<true, NoFolder> Builder(InsertPt); + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. - if (IfBlock1) { - DomBlock->getInstList().splice(DomBlock->getTerminator(), - IfBlock1->getInstList(), - IfBlock1->begin(), + if (IfBlock1) + DomBlock->getInstList().splice(InsertPt, + IfBlock1->getInstList(), IfBlock1->begin(), IfBlock1->getTerminator()); - } - if (IfBlock2) { - DomBlock->getInstList().splice(DomBlock->getTerminator(), - IfBlock2->getInstList(), - IfBlock2->begin(), + if (IfBlock2) + DomBlock->getInstList().splice(InsertPt, + IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - } while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. - Value *TrueVal = - PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); - Value *FalseVal = - PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); + Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); + Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, AfterPHIIt); + SelectInst *NV = + cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); - - BB->getInstList().erase(PN); + PN->eraseFromParent(); } + + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement + // has been flattened. Change DomBlock to jump directly to our new block to + // avoid other simplifycfg's kicking in on the diamond. + TerminatorInst *OldTI = DomBlock->getTerminator(); + Builder.SetInsertPoint(OldTI); + Builder.CreateBr(BB); + OldTI->eraseFromParent(); return true; } -/// isTerminatorFirstRelevantInsn - Return true if Term is very first -/// instruction ignoring Phi nodes and dbg intrinsics. -static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) { - BasicBlock::iterator BBI = Term; - while (BBI != BB->begin()) { - --BBI; - if (!ISA_DEBUG_INFO_INTRINSIC(BBI)) - break; - } - - if (isa<PHINode>(BBI) || &*BBI == Term || ISA_DEBUG_INFO_INTRINSIC(BBI)) - return true; - return false; -} - /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, + IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); @@ -1294,18 +1387,19 @@ // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. - if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet)) + if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator()) return false; - if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet)) + if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator()) return false; + Builder.SetInsertPoint(BI); // Okay, we found a branch that is going to two return nodes. If // there is no return value for this function, just change the // branch into a return. if (FalseRet->getNumOperands() == 0) { TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - ReturnInst::Create(BI->getContext(), 0, BI); + Builder.CreateRetVoid(); EraseTerminatorInstAndDCECond(BI); return true; } @@ -1348,14 +1442,14 @@ } else if (isa<UndefValue>(TrueValue)) { TrueValue = FalseValue; } else { - TrueValue = SelectInst::Create(BrCond, TrueValue, - FalseValue, BI); + TrueValue = Builder.CreateSelect(BrCond, TrueValue, + FalseValue, "retval"); } } - Value *RI = !TrueValue ? - ReturnInst::Create(BI->getContext(), BI) : - ReturnInst::Create(BI->getContext(), TrueValue, BI); + Value *RI = !TrueValue ? + Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); + (void) RI; DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" @@ -1367,24 +1461,24 @@ return true; } -/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, -/// and if a predecessor branches to us and one of our successors, fold the -/// setcc into the predecessor and use logical operations to pick the right -/// destination. +/// FoldBranchToCommonDest - If this basic block is simple enough, and if a +/// predecessor branches to us and one of our successors, fold the block into +/// the predecessor and use logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; - + // Only allow this if the condition is a simple instruction that can be // executed unconditionally. It must be in the same block as the branch, and // must be at the front of the block. BasicBlock::iterator FrontIt = BB->front(); + // Ignore dbg intrinsics. - while(ISA_DEBUG_INFO_INTRINSIC(FrontIt)) - ++FrontIt; + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; // Allow a single instruction to be hoisted in addition to the compare // that feeds the branch. We later ensure that any values that _it_ uses @@ -1396,21 +1490,23 @@ FrontIt->isSafeToSpeculativelyExecute()) { BonusInst = &*FrontIt; ++FrontIt; + + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; } - + // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; + // Ingore dbg intrinsics. - while(ISA_DEBUG_INFO_INTRINSIC(CondIt)) - ++CondIt; - if (&*CondIt != BI) { - assert (!ISA_DEBUG_INFO_INTRINSIC(CondIt) && "Hey do not forget debug info!"); + while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; + + if (&*CondIt != BI) return false; - } // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. @@ -1421,13 +1517,12 @@ if (CE->canTrap()) return false; - // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = BI->getSuccessor(1); if (TrueDest == BB || FalseDest == BB) return false; - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); @@ -1435,10 +1530,24 @@ // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - if (PBI == 0 || PBI->isUnconditional() || - !SafeToMergeTerminators(BI, PBI)) + if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc; + bool InvertPredCond = false; + + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values // must already have been resolved, so we won't be inhibiting the @@ -1467,7 +1576,7 @@ UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - if (Instruction* I = dyn_cast<Instruction>(Pair.first)) { + if (Instruction *I = dyn_cast<Instruction>(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); @@ -1476,28 +1585,22 @@ if (!UsedValues.empty()) return false; } - - Instruction::BinaryOps Opc; - bool InvertPredCond = false; - - if (PBI->getSuccessor(0) == TrueDest) - Opc = Instruction::Or; - else if (PBI->getSuccessor(1) == FalseDest) - Opc = Instruction::And; - else if (PBI->getSuccessor(0) == FalseDest) - Opc = Instruction::And, InvertPredCond = true; - else if (PBI->getSuccessor(1) == TrueDest) - Opc = Instruction::Or, InvertPredCond = true; - else - continue; DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - + IRBuilder<> Builder(PBI); + // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { - Value *NewCond = - BinaryOperator::CreateNot(PBI->getCondition(), - PBI); + Value *NewCond = PBI->getCondition(); + + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { + CmpInst *CI = cast<CmpInst>(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = Builder.CreateNot(NewCond, + PBI->getCondition()->getName()+".not"); + } + PBI->setCondition(NewCond); BasicBlock *OldTrue = PBI->getSuccessor(0); BasicBlock *OldFalse = PBI->getSuccessor(1); @@ -1522,8 +1625,9 @@ New->takeName(Cond); Cond->setName(New->getName()+".old"); - Value *NewCond = BinaryOperator::Create(Opc, PBI->getCondition(), - New, PBI); + Instruction *NewCond = + cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), + New, "or.cond")); PBI->setCondition(NewCond); if (PBI->getSuccessor(0) == BB) { AddPredecessorToBlock(TrueDest, PredBlock, BB); @@ -1533,6 +1637,12 @@ AddPredecessorToBlock(FalseDest, PredBlock, BB); PBI->setSuccessor(1, FalseDest); } + + // Copy any debug value intrinsics into the end of PredBlock. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isa<DbgInfoIntrinsic>(*I)) + I->clone()->insertBefore(PBI); + return true; } return false; @@ -1565,12 +1675,15 @@ // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { + pred_iterator PB = pred_begin(BB), PE = pred_end(BB); PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), + std::distance(PB, PE), + BI->getCondition()->getName() + ".pr", BB->begin()); // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && PBI->isConditional() && @@ -1594,7 +1707,7 @@ // fold the conditions into logical ops and one cond br. BasicBlock::iterator BBI = BB->begin(); // Ignore dbg intrinsics. - while (ISA_DEBUG_INFO_INTRINSIC(BBI)) + while (isa<DbgInfoIntrinsic>(BBI)) ++BBI; if (&*BBI != BI) return false; @@ -1651,27 +1764,28 @@ // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), - BB->getParent()); + "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; } DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); + IRBuilder<true, NoFolder> Builder(PBI); if (PBIOp) - PBICond = BinaryOperator::CreateNot(PBICond, - PBI); + PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not"); + Value *BICond = BI->getCondition(); if (BIOp) - BICond = BinaryOperator::CreateNot(BICond, - PBI); + BICond = Builder.CreateNot(BICond, BICond->getName()+".not"); + // Merge the conditions. - Value *Cond = BinaryOperator::CreateOr(PBICond, BICond, PBI); + Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); @@ -1680,17 +1794,13 @@ // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. - PHINode *PN; - for (BasicBlock::iterator II = OtherDest->begin(); - (PN = dyn_cast<PHINode>(II)); ++II) { - Value *V = PN->getIncomingValueForBlock(BB); - PN->addIncoming(V, PBI->getParent()); - } + AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make // them agree. + PHINode *PN; for (BasicBlock::iterator II = CommonDest->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { Value *BIV = PN->getIncomingValueForBlock(BB); @@ -1698,8 +1808,8 @@ Value *PBIV = PN->getIncomingValue(PBBIdx); if (BIV != PBIV) { // Insert a select in PBI to pick the right value. - Value *NV = SelectInst::Create(PBICond, PBIV, BIV, - PBI); + Value *NV = cast<SelectInst> + (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux")); PN->setIncomingValue(PBBIdx, NV); } } @@ -1712,18 +1822,1030 @@ return true; } +// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a +// branch to TrueBB if Cond is true or to FalseBB if Cond is false. +// Takes care of updating the successors and removing the old terminator. +// Also makes sure not to introduce new successors by assuming that edges to +// non-successor TrueBBs and FalseBBs aren't reachable. +static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB){ + // Remove any superfluous successor edges from the CFG. + // First, figure out which successors to preserve. + // If TrueBB and FalseBB are equal, only try to preserve one copy of that + // successor. + BasicBlock *KeepEdge1 = TrueBB; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0; + + // Then remove the rest. + for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { + BasicBlock *Succ = OldTerm->getSuccessor(I); + // Make sure only to keep exactly one copy of each edge. + if (Succ == KeepEdge1) + KeepEdge1 = 0; + else if (Succ == KeepEdge2) + KeepEdge2 = 0; + else + Succ->removePredecessor(OldTerm->getParent()); + } + + IRBuilder<> Builder(OldTerm); + Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); + + // Insert an appropriate new terminator. + if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) { + if (TrueBB == FalseBB) + // We were only looking for one successor, and it was present. + // Create an unconditional branch to it. + Builder.CreateBr(TrueBB); + else + // We found both of the successors we were looking for. + // Create a conditional branch sharing the condition of the select. + Builder.CreateCondBr(Cond, TrueBB, FalseBB); + } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { + // Neither of the selected blocks were successors, so this + // terminator must be unreachable. + new UnreachableInst(OldTerm->getContext(), OldTerm); + } else { + // One of the selected values was a successor, but the other wasn't. + // Insert an unconditional branch to the one that was found; + // the edge to the one that wasn't must be unreachable. + if (KeepEdge1 == 0) + // Only TrueBB was found. + Builder.CreateBr(TrueBB); + else + // Only FalseBB was found. + Builder.CreateBr(FalseBB); + } + + EraseTerminatorInstAndDCECond(OldTerm); + return true; +} + +// SimplifySwitchOnSelect - Replaces +// (switch (select cond, X, Y)) on constant X, Y +// with a branch - conditional if X and Y lead to distinct BBs, +// unconditional otherwise. +static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { + // Check for constant integer values in the select. + ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue()); + ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue()); + if (!TrueVal || !FalseVal) + return false; + + // Find the relevant condition and destinations. + Value *Condition = Select->getCondition(); + BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal)); + BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal)); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); +} + +// SimplifyIndirectBrOnSelect - Replaces +// (indirectbr (select cond, blockaddress(@fn, BlockA), +// blockaddress(@fn, BlockB))) +// with +// (br cond, BlockA, BlockB). +static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { + // Check that both operands of the select are block addresses. + BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue()); + BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue()); + if (!TBA || !FBA) + return false; + + // Extract the actual blocks. + BasicBlock *TrueBB = TBA->getBasicBlock(); + BasicBlock *FalseBB = FBA->getBasicBlock(); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB); +} + +/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp +/// instruction (a seteq/setne with a constant) as the only instruction in a +/// block that ends with an uncond branch. We are looking for a very specific +/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In +/// this case, we merge the first two "or's of icmp" into a switch, but then the +/// default value goes to an uncond block with a seteq in it, we get something +/// like: +/// +/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] +/// DEFAULT: +/// %tmp = icmp eq i8 %A, 92 +/// br label %end +/// end: +/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] +/// +/// We prefer to split the edge to 'end' so that there is a true/false entry to +/// the PHI, merging the third icmp into the switch. +static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, + const TargetData *TD, + IRBuilder<> &Builder) { + BasicBlock *BB = ICI->getParent(); + + // If the block has any PHIs in it or the icmp has multiple uses, it is too + // complex. + if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false; + + Value *V = ICI->getOperand(0); + ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); + + // The pattern we're looking for is where our only predecessor is a switch on + // 'V' and this block is the default case for the switch. In this case we can + // fold the compared value into the switch to simplify things. + BasicBlock *Pred = BB->getSinglePredecessor(); + if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; + + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); + if (SI->getCondition() != V) + return false; + + // If BB is reachable on a non-default case, then we simply know the value of + // V in this block. Substitute it and constant fold the icmp instruction + // away. + if (SI->getDefaultDest() != BB) { + ConstantInt *VVal = SI->findCaseDest(BB); + assert(VVal && "Should have a unique destination value"); + ICI->setOperand(0, VVal); + + if (Value *V = SimplifyInstruction(ICI, TD)) { + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + } + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB) | true; + } + + // Ok, the block is reachable from the default dest. If the constant we're + // comparing exists in one of the other edges, then we can constant fold ICI + // and zap it. + if (SI->findCaseValue(Cst) != 0) { + Value *V; + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + V = ConstantInt::getFalse(BB->getContext()); + else + V = ConstantInt::getTrue(BB->getContext()); + + ICI->replaceAllUsesWith(V); + ICI->eraseFromParent(); + // BB is now empty, so it is likely to simplify away. + return SimplifyCFG(BB) | true; + } + + // The use of the icmp has to be in the 'end' block, by the only PHI node in + // the block. + BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); + PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back()); + if (PHIUse == 0 || PHIUse != &SuccBlock->front() || + isa<PHINode>(++BasicBlock::iterator(PHIUse))) + return false; + + // If the icmp is a SETEQ, then the default dest gets false, the new edge gets + // true in the PHI. + Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); + Constant *NewCst = ConstantInt::getFalse(BB->getContext()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(DefaultCst, NewCst); + + // Replace ICI (which is used by the PHI for the default value) with true or + // false depending on if it is EQ or NE. + ICI->replaceAllUsesWith(DefaultCst); + ICI->eraseFromParent(); + + // Okay, the switch goes to this block on a default value. Add an edge from + // the switch to the merge point on the compared value. + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", + BB->getParent(), BB); + SI->addCase(Cst, NewBB); + + // NewBB branches to the phi block, add the uncond branch and the phi entry. + Builder.SetInsertPoint(NewBB); + Builder.SetCurrentDebugLocation(SI->getDebugLoc()); + Builder.CreateBr(SuccBlock); + PHIUse->addIncoming(NewCst, NewBB); + return true; +} + +/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. +/// Check to see if it is branching on an or/and chain of icmp instructions, and +/// fold it into a switch instruction if so. +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, + IRBuilder<> &Builder) { + Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); + if (Cond == 0) return false; + + + // Change br (X == 0 | X == 1), T, F into a switch instruction. + // If this is a bunch of seteq's or'd together, or if it's a bunch of + // 'setne's and'ed together, collect them. + Value *CompVal = 0; + std::vector<ConstantInt*> Values; + bool TrueWhenEqual = true; + Value *ExtraCase = 0; + unsigned UsedICmps = 0; + + if (Cond->getOpcode() == Instruction::Or) { + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, + UsedICmps); + } else if (Cond->getOpcode() == Instruction::And) { + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false, + UsedICmps); + TrueWhenEqual = false; + } + + // If we didn't have a multiply compared value, fail. + if (CompVal == 0) return false; + + // Avoid turning single icmps into a switch. + if (UsedICmps <= 1) + return false; + + // There might be duplicate constants in the list, which the switch + // instruction can't handle, remove them now. + array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); + Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); + + // If Extra was used, we require at least two switch values to do the + // transformation. A switch with one value is just an cond branch. + if (ExtraCase && Values.size() < 2) return false; + + // Figure out which block is which destination. + BasicBlock *DefaultBB = BI->getSuccessor(1); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); + + BasicBlock *BB = BI->getParent(); + + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() + << " cases into SWITCH. BB is:\n" << *BB); + + // If there are any extra values that couldn't be folded into the switch + // then we evaluate them with an explicit branch first. Split the block + // right before the condbr to handle it. + if (ExtraCase) { + BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test"); + // Remove the uncond branch added to the old block. + TerminatorInst *OldTI = BB->getTerminator(); + Builder.SetInsertPoint(OldTI); + + if (TrueWhenEqual) + Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); + else + Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); + + OldTI->eraseFromParent(); + + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them + // for the edge we just added. + AddPredecessorToBlock(EdgeBB, BB, NewBB); + + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase + << "\nEXTRABB = " << *BB); + BB = NewBB; + } + + Builder.SetInsertPoint(BI); + // Convert pointer to int before we switch. + if (CompVal->getType()->isPointerTy()) { + assert(TD && "Cannot switch on pointer without TargetData"); + CompVal = Builder.CreatePtrToInt(CompVal, + TD->getIntPtrType(CompVal->getContext()), + "magicptr"); + } + + // Create the new switch instruction now. + SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); + + // Add all of the 'cases' to the switch instruction. + for (unsigned i = 0, e = Values.size(); i != e; ++i) + New->addCase(Values[i], EdgeBB); + + // We added edges from PI to the EdgeBB. As such, if there were any + // PHI nodes in EdgeBB, they need entries to be added corresponding to + // the number of edges added. + for (BasicBlock::iterator BBI = EdgeBB->begin(); + isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + Value *InVal = PN->getIncomingValueForBlock(BB); + for (unsigned i = 0, e = Values.size()-1; i != e; ++i) + PN->addIncoming(InVal, BB); + } + + // Erase the old branch instruction. + EraseTerminatorInstAndDCECond(BI); + + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); + return true; +} + +bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { + // If this is a trivial landing pad that just continues unwinding the caught + // exception then zap the landing pad, turning its invokes into calls. + BasicBlock *BB = RI->getParent(); + LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); + if (RI->getValue() != LPInst) + // Not a landing pad, or the resume is not unwinding the exception that + // caused control to branch here. + return false; + + // Check that there are no other instructions except for debug intrinsics. + BasicBlock::iterator I = LPInst, E = RI; + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + // Turn all invokes that unwind here into calls and delete the basic block. + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + Call->setAttributes(II->getAttributes()); + Call->setDebugLoc(II->getDebugLoc()); + + // Anything that used the value produced by the invoke instruction now uses + // the value produced by the call instruction. Note that we do this even + // for void functions and calls with no uses so that the callgraph edge is + // updated. + II->replaceAllUsesWith(Call); + BB->removePredecessor(II->getParent()); + + // Insert a branch to the normal destination right before the invoke. + BranchInst::Create(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + II->eraseFromParent(); + } + + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; +} + +bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { + BasicBlock *BB = RI->getParent(); + if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; + + // Find predecessors that end with branches. + SmallVector<BasicBlock*, 8> UncondBranchPreds; + SmallVector<BranchInst*, 8> CondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + TerminatorInst *PTI = P->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { + if (BI->isUnconditional()) + UncondBranchPreds.push_back(P); + else + CondBranchPreds.push_back(BI); + } + } + + // If we found some, do the transformation! + if (!UncondBranchPreds.empty() && DupRet) { + while (!UncondBranchPreds.empty()) { + BasicBlock *Pred = UncondBranchPreds.pop_back_val(); + DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); + (void)FoldReturnIntoUncondBranch(RI, BB, Pred); + } + + // If we eliminated all predecessors of the block, delete the block now. + if (pred_begin(BB) == pred_end(BB)) + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + + return true; + } + + // Check out all of the conditional branches going to this return + // instruction. If any of them just select between returns, change the + // branch itself into a select/return pair. + while (!CondBranchPreds.empty()) { + BranchInst *BI = CondBranchPreds.pop_back_val(); + + // Check to see if the non-BB successor is also a return block. + if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && + isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && + SimplifyCondBranchToTwoReturns(BI, Builder)) + return true; + } + return false; +} + +bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) { + // Check to see if the first instruction in this block is just an unwind. + // If so, replace any invoke instructions which use this as an exception + // destination with call instructions. + BasicBlock *BB = UI->getParent(); + if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; + + bool Changed = false; + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.back(); + InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()); + if (II && II->getUnwindDest() == BB) { + // Insert a new branch instruction before the invoke, because this + // is now a fall through. + Builder.SetInsertPoint(II); + BranchInst *BI = Builder.CreateBr(II->getNormalDest()); + Pred->getInstList().remove(II); // Take out of symbol table + + // Insert the call now. + SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); + Builder.SetInsertPoint(BI); + CallInst *CI = Builder.CreateCall(II->getCalledValue(), + Args, II->getName()); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + // If the invoke produced a value, the Call now does instead. + II->replaceAllUsesWith(CI); + delete II; + Changed = true; + } + + Preds.pop_back(); + } + + // If this block is now dead (and isn't the entry block), remove it. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + return true; + } + + return Changed; +} + +bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { + BasicBlock *BB = UI->getParent(); + + bool Changed = false; + + // If there are any instructions immediately before the unreachable that can + // be removed, do so. + while (UI != BB->begin()) { + BasicBlock::iterator BBI = UI; + --BBI; + // Do not delete instructions that can have side effects which might cause + // the unreachable to not be reachable; specifically, calls and volatile + // operations may have this effect. + if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; + + if (BBI->mayHaveSideEffects()) { + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (SI->isVolatile()) + break; + } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI->isVolatile()) + break; + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { + if (RMWI->isVolatile()) + break; + } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { + if (CXI->isVolatile()) + break; + } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && + !isa<LandingPadInst>(BBI)) { + break; + } + // Note that deleting LandingPad's here is in fact okay, although it + // involves a bit of subtle reasoning. If this inst is a LandingPad, + // all the predecessors of this block will be the unwind edges of Invokes, + // and we can therefore guarantee this block will be erased. + } + + // Delete this instruction (any uses are guaranteed to be dead) + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BBI->eraseFromParent(); + Changed = true; + } + + // If the unreachable instruction is the first in the block, take a gander + // at all of the predecessors of this instruction, and simplify them. + if (&BB->front() != UI) return Changed; + + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + TerminatorInst *TI = Preds[i]->getTerminator(); + IRBuilder<> Builder(TI); + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isUnconditional()) { + if (BI->getSuccessor(0) == BB) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; + } + } else { + if (BI->getSuccessor(0) == BB) { + Builder.CreateBr(BI->getSuccessor(1)); + EraseTerminatorInstAndDCECond(BI); + } else if (BI->getSuccessor(1) == BB) { + Builder.CreateBr(BI->getSuccessor(0)); + EraseTerminatorInstAndDCECond(BI); + Changed = true; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + if (SI->getSuccessor(i) == BB) { + BB->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; + Changed = true; + } + // If the default value is unreachable, figure out the most popular + // destination and make it the default. + if (SI->getSuccessor(0) == BB) { + std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + std::pair<unsigned, unsigned>& entry = + Popularity[SI->getSuccessor(i)]; + if (entry.first == 0) { + entry.first = 1; + entry.second = i; + } else { + entry.first++; + } + } + + // Find the most popular block. + unsigned MaxPop = 0; + unsigned MaxIndex = 0; + BasicBlock *MaxBlock = 0; + for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator + I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { + if (I->second.first > MaxPop || + (I->second.first == MaxPop && MaxIndex > I->second.second)) { + MaxPop = I->second.first; + MaxIndex = I->second.second; + MaxBlock = I->first; + } + } + if (MaxBlock) { + // Make this the new default, allowing us to delete any explicit + // edges to it. + SI->setSuccessor(0, MaxBlock); + Changed = true; + + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from + // it. + if (isa<PHINode>(MaxBlock->begin())) + for (unsigned i = 0; i != MaxPop-1; ++i) + MaxBlock->removePredecessor(SI->getParent()); + + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + if (SI->getSuccessor(i) == MaxBlock) { + SI->removeCase(i); + --i; --e; + } + } + } + } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + // Convert the invoke to a call instruction. This would be a good + // place to note that the call does not throw though. + BranchInst *BI = Builder.CreateBr(II->getNormalDest()); + II->removeFromParent(); // Take out of symbol table + + // Insert the call now... + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); + Builder.SetInsertPoint(BI); + CallInst *CI = Builder.CreateCall(II->getCalledValue(), + Args, II->getName()); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + // If the invoke produced a value, the call does now instead. + II->replaceAllUsesWith(CI); + delete II; + Changed = true; + } + } + } + + // If this block is now dead, remove it. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { + // We know there are no successors, so just nuke the block. + BB->eraseFromParent(); + return true; + } + + return Changed; +} + +/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a +/// integer range comparison into a sub, an icmp and a branch. +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { + assert(SI->getNumCases() > 2 && "Degenerate switch?"); + + // Make sure all cases point to the same destination and gather the values. + SmallVector<ConstantInt *, 16> Cases; + Cases.push_back(SI->getCaseValue(1)); + for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) { + if (SI->getSuccessor(I-1) != SI->getSuccessor(I)) + return false; + Cases.push_back(SI->getCaseValue(I)); + } + assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered"); + + // Sort the case values, then check if they form a range we can transform. + array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); + for (unsigned I = 1, E = Cases.size(); I != E; ++I) { + if (Cases[I-1]->getValue() != Cases[I]->getValue()+1) + return false; + } + + Constant *Offset = ConstantExpr::getNeg(Cases.back()); + Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1); + + Value *Sub = SI->getCondition(); + if (!Offset->isNullValue()) + Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); + Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest()); + + // Prune obsolete incoming values off the successor's PHI nodes. + for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin(); + isa<PHINode>(BBI); ++BBI) { + for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + SI->eraseFromParent(); + + return true; +} + +/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch +/// and use it to remove dead cases. +static bool EliminateDeadSwitchCases(SwitchInst *SI) { + Value *Cond = SI->getCondition(); + unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth(); + APInt KnownZero(Bits, 0), KnownOne(Bits, 0); + ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne); + + // Gather dead cases. + SmallVector<ConstantInt*, 8> DeadCases; + for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) { + if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 || + (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) { + DeadCases.push_back(SI->getCaseValue(I)); + DEBUG(dbgs() << "SimplifyCFG: switch case '" + << SI->getCaseValue(I)->getValue() << "' is dead.\n"); + } + } + + // Remove dead cases from the switch. + for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { + unsigned Case = SI->findCaseValue(DeadCases[I]); + // Prune unused values from PHI nodes. + SI->getSuccessor(Case)->removePredecessor(SI->getParent()); + SI->removeCase(Case); + } + + return !DeadCases.empty(); +} + +/// FindPHIForConditionForwarding - If BB would be eligible for simplification +/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated +/// by an unconditional branch), look at the phi node for BB in the successor +/// block and see if the incoming value is equal to CaseValue. If so, return +/// the phi node, and set PhiIndex to BB's index in the phi node. +static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, + BasicBlock *BB, + int *PhiIndex) { + if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) + return NULL; // BB must be empty to be a candidate for simplification. + if (!BB->getSinglePredecessor()) + return NULL; // BB must be dominated by the switch. + + BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Branch || !Branch->isUnconditional()) + return NULL; // Terminator must be unconditional branch. + + BasicBlock *Succ = Branch->getSuccessor(0); + + BasicBlock::iterator I = Succ->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(BB); + assert(Idx >= 0 && "PHI has no entry for predecessor?"); + + Value *InValue = PHI->getIncomingValue(Idx); + if (InValue != CaseValue) continue; + + *PhiIndex = Idx; + return PHI; + } + + return NULL; +} + +/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch +/// instruction to a phi node dominated by the switch, if that would mean that +/// some of the destination blocks of the switch can be folded away. +/// Returns true if a change is made. +static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { + typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap; + ForwardingNodesMap ForwardingNodes; + + for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case. + ConstantInt *CaseValue = SI->getCaseValue(I); + BasicBlock *CaseDest = SI->getSuccessor(I); + + int PhiIndex; + PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, + &PhiIndex); + if (!PHI) continue; + + ForwardingNodes[PHI].push_back(PhiIndex); + } + + bool Changed = false; + + for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), + E = ForwardingNodes.end(); I != E; ++I) { + PHINode *Phi = I->first; + SmallVector<int,4> &Indexes = I->second; + + if (Indexes.size() < 2) continue; + + for (size_t I = 0, E = Indexes.size(); I != E; ++I) + Phi->setIncomingValue(Indexes[I], SI->getCondition()); + Changed = true; + } + + return Changed; +} + +bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { + // If this switch is too complex to want to look at, ignore it. + if (!isValueEqualityComparison(SI)) + return false; + + BasicBlock *BB = SI->getParent(); + + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return SimplifyCFG(BB) | true; + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB) | true; + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return SimplifyCFG(BB) | true; + + // Try to transform the switch into an icmp and a branch. + if (TurnSwitchRangeIntoICmp(SI, Builder)) + return SimplifyCFG(BB) | true; + + // Remove unreachable cases. + if (EliminateDeadSwitchCases(SI)) + return SimplifyCFG(BB) | true; + + if (ForwardSwitchConditionToPHI(SI)) + return SimplifyCFG(BB) | true; + + return false; +} + +bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) { + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; --e; + Changed = true; + } + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; + } + + if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + EraseTerminatorInstAndDCECond(IBI); + return true; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { + if (SimplifyIndirectBrOnSelect(IBI, SI)) + return SimplifyCFG(BB) | true; + } + return Changed; +} + +bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ + BasicBlock *BB = BI->getParent(); + + // If the Terminator is the only non-phi instruction, simplify the block. + BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); + if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + TryToSimplifyUncondBranchFromEmptyBlock(BB)) + return true; + + // If the only instruction in the block is a seteq/setne comparison + // against a constant, try to simplify the block. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) + if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) + ; + if (I->isTerminator() + && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) + return true; + } + + return false; +} + + +bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { + BasicBlock *BB = BI->getParent(); + + // Conditional branch + if (isValueEqualityComparison(BI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this + // switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) + return SimplifyCFG(BB) | true; + + // This block must be empty, except for the setcond inst, if it exists. + // Ignore dbg intrinsics. + BasicBlock::iterator I = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI, Builder)) + return SimplifyCFG(BB) | true; + } else if (&*I == cast<Instruction>(BI->getCondition())){ + ++I; + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(I)) + ++I; + if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) + return SimplifyCFG(BB) | true; + } + } + + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. + if (SimplifyBranchOnICmpChain(BI, TD, Builder)) + return true; + + // We have a conditional branch to two blocks that are only reachable + // from BI. We know that the condbr dominates the two blocks, so see if + // there is any identical code in the "then" and "else" blocks. If so, we + // can hoist it up to the branching block. + if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { + if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + if (HoistThenElseCodeToIf(BI)) + return SimplifyCFG(BB) | true; + } else { + // If Successor #1 has multiple preds, we may be able to conditionally + // execute Successor #0 if it branches to successor #1. + TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); + if (Succ0TI->getNumSuccessors() == 1 && + Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) + return SimplifyCFG(BB) | true; + } + } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + // If Successor #0 has multiple preds, we may be able to conditionally + // execute Successor #1 if it branches to successor #0. + TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); + if (Succ1TI->getNumSuccessors() == 1 && + Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) + return SimplifyCFG(BB) | true; + } + + // If this is a branch on a phi node in the current block, thread control + // through this block if any PHI node entries are constants. + if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) + if (PN->getParent() == BI->getParent()) + if (FoldCondBranchOnPHI(BI, TD)) + return SimplifyCFG(BB) | true; + + // If this basic block is ONLY a setcc and a branch, and if a predecessor + // branches to us and one of our successors, fold the setcc into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB) | true; + + // Scan predecessor blocks for conditional branches. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (SimplifyCondBranchToCondBranch(PBI, BI)) + return SimplifyCFG(BB) | true; + + return false; +} + +/// Check if passing a value to an instruction will cause undefined behavior. +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (!I->hasOneUse()) // Only look at single-use instructions, for compile time + return false; + + if (C->isNullValue()) { + Instruction *Use = I->use_back(); + + // Now make sure that there are no instructions in between that can alter + // control flow (eg. calls) + for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i) + if (i == I->getParent()->end() || i->mayHaveSideEffects()) + return false; + + // Look through GEPs. A load from a GEP derived from NULL is still undefined + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) + if (GEP->getPointerOperand() == I) + return passingValueIsAlwaysUndefined(V, GEP); + + // Look through bitcasts. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) + return passingValueIsAlwaysUndefined(V, BC); + + // Load from null is undefined. + if (LoadInst *LI = dyn_cast<LoadInst>(Use)) + return LI->getPointerAddressSpace() == 0; + + // Store to null is undefined. + if (StoreInst *SI = dyn_cast<StoreInst>(Use)) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + } + return false; +} + +/// If BB has an incoming value that will always trigger undefined behavior +/// (eg. null pointer derefence), remove the branch leading here. +static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { + for (BasicBlock::iterator i = BB->begin(); + PHINode *PHI = dyn_cast<PHINode>(i); ++i) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) { + TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator(); + IRBuilder<> Builder(T); + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + BB->removePredecessor(PHI->getIncomingBlock(i)); + // Turn uncoditional branches into unreachables and remove the dead + // destination from conditional branches. + if (BI->isUnconditional()) + Builder.CreateUnreachable(); + else + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : + BI->getSuccessor(0)); + BI->eraseFromParent(); + return true; + } + // TODO: SwitchInst. + } + + return false; +} + bool SimplifyCFGOpt::run(BasicBlock *BB) { bool Changed = false; - Function *M = BB->getParent(); assert(BB && BB->getParent() && "Block not embedded in function!"); assert(BB->getTerminator() && "Degenerate basic block encountered!"); - assert(&BB->getParent()->getEntryBlock() != BB && - "Can't Simplify entry block!"); - // Remove basic blocks that have no predecessors... or that just have themself - // as a predecessor. These are unreachable. - if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) || + BB->getSinglePredecessor() == BB) { DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; @@ -1731,268 +2853,13 @@ // Check to see if we can constant propagate this terminator instruction // away... - Changed |= ConstantFoldTerminator(BB); + Changed |= ConstantFoldTerminator(BB, true); // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); - // If there is a trivial two-entry PHI node in this basic block, and we can - // eliminate it, do so now. - if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) - if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN); - - // If this is a returning block with only PHI nodes in it, fold the return - // instruction into any unconditional branch predecessors. - // - // If any predecessor is a conditional branch that just selects among - // different return values, fold the replace the branch/return with a select - // and return. - if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) { - // Find predecessors that end with branches. - SmallVector<BasicBlock*, 8> UncondBranchPreds; - SmallVector<BranchInst*, 8> CondBranchPreds; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - TerminatorInst *PTI = P->getTerminator(); - if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { - if (BI->isUnconditional()) - UncondBranchPreds.push_back(P); - else - CondBranchPreds.push_back(BI); - } - } - - // If we found some, do the transformation! - if (!UncondBranchPreds.empty()) { - while (!UncondBranchPreds.empty()) { - BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - DEBUG(dbgs() << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred); - Instruction *UncondBranch = Pred->getTerminator(); - // Clone the return and add it to the end of the predecessor. - Instruction *NewRet = RI->clone(); - Pred->getInstList().push_back(NewRet); - - // If the return instruction returns a value, and if the value was a - // PHI node in "BB", propagate the right value into the return. - for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast<PHINode>(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); - - // Update any PHI nodes in the returning block to realize that we no - // longer branch to them. - BB->removePredecessor(Pred); - Pred->getInstList().erase(UncondBranch); - } - - // If we eliminated all predecessors of the block, delete the block now. - if (pred_begin(BB) == pred_end(BB)) - // We know there are no successors, so just nuke the block. - M->getBasicBlockList().erase(BB); - - return true; - } - - // Check out all of the conditional branches going to this return - // instruction. If any of them just select between returns, change the - // branch itself into a select/return pair. - while (!CondBranchPreds.empty()) { - BranchInst *BI = CondBranchPreds.pop_back_val(); - - // Check to see if the non-BB successor is also a return block. - if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && - isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && - SimplifyCondBranchToTwoReturns(BI)) - return true; - } - } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (isValueEqualityComparison(SI)) { - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) - return SimplifyCFG(BB) || 1; - - // If the block only contains the switch, see if we can fold the block - // away into any preds. - BasicBlock::iterator BBI = BB->begin(); - // Ignore dbg intrinsics. - while (ISA_DEBUG_INFO_INTRINSIC(BBI)) - ++BBI; - if (SI == &*BBI) - if (FoldValueComparisonIntoPredecessors(SI)) - return SimplifyCFG(BB) || 1; - } - } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - if (BI->isUnconditional()) { - BasicBlock::iterator BBI = BB->getFirstNonPHI(); - - // Ignore dbg intrinsics. - while (ISA_DEBUG_INFO_INTRINSIC(BBI)) - ++BBI; - if (BBI->isTerminator()) // Terminator is the only non-phi instruction! - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) - return true; - - } else { // Conditional branch - if (isValueEqualityComparison(BI)) { - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this - // switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred)) - return SimplifyCFG(BB) | true; - - // This block must be empty, except for the setcond inst, if it exists. - // Ignore dbg intrinsics. - BasicBlock::iterator I = BB->begin(); - // Ignore dbg intrinsics. - while (ISA_DEBUG_INFO_INTRINSIC(I)) - ++I; - if (&*I == BI) { - if (FoldValueComparisonIntoPredecessors(BI)) - return SimplifyCFG(BB) | true; - } else if (&*I == cast<Instruction>(BI->getCondition())){ - ++I; - // Ignore dbg intrinsics. - while (ISA_DEBUG_INFO_INTRINSIC(I)) - ++I; - if(&*I == BI) { - if (FoldValueComparisonIntoPredecessors(BI)) - return SimplifyCFG(BB) | true; - } - } - } - - // If this is a branch on a phi node in the current block, thread control - // through this block if any PHI node entries are constants. - if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) - if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI)) - return SimplifyCFG(BB) | true; - - // If this basic block is ONLY a setcc and a branch, and if a predecessor - // branches to us and one of our successors, fold the setcc into the - // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB) | true; - - - // Scan predecessor blocks for conditional branches. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB) | true; - } - } else if (isa<UnreachableInst>(BB->getTerminator())) { - // If there are any instructions immediately before the unreachable that can - // be removed, do so. - Instruction *Unreachable = BB->getTerminator(); - while (Unreachable != BB->begin()) { - BasicBlock::iterator BBI = Unreachable; - --BBI; - // Do not delete instructions that can have side effects, like calls - // (which may never return) and volatile loads and stores. - if (isa<CallInst>(BBI) && !ISA_DEBUG_INFO_INTRINSIC(BBI)) break; - - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) - if (SI->isVolatile()) - break; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) - if (LI->isVolatile()) - break; - - // Delete this instruction - BB->getInstList().erase(BBI); - Changed = true; - } - - // If the unreachable instruction is the first in the block, take a gander - // at all of the predecessors of this instruction, and simplify them. - if (&BB->front() == Unreachable) { - SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - TerminatorInst *TI = Preds[i]->getTerminator(); - - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isUnconditional()) { - if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI->getContext(), TI); - TI->eraseFromParent(); - Changed = true; - } - } else { - if (BI->getSuccessor(0) == BB) { - BranchInst::Create(BI->getSuccessor(1), BI); - EraseTerminatorInstAndDCECond(BI); - } else if (BI->getSuccessor(1) == BB) { - BranchInst::Create(BI->getSuccessor(0), BI); - EraseTerminatorInstAndDCECond(BI); - Changed = true; - } - } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == BB) { - BB->removePredecessor(SI->getParent()); - SI->removeCase(i); - --i; --e; - Changed = true; - } - // If the default value is unreachable, figure out the most popular - // destination and make it the default. - if (SI->getSuccessor(0) == BB) { - std::map<BasicBlock*, unsigned> Popularity; - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - Popularity[SI->getSuccessor(i)]++; - - // Find the most popular block. - unsigned MaxPop = 0; - BasicBlock *MaxBlock = 0; - for (std::map<BasicBlock*, unsigned>::iterator - I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second > MaxPop) { - MaxPop = I->second; - MaxBlock = I->first; - } - } - if (MaxBlock) { - // Make this the new default, allowing us to delete any explicit - // edges to it. - SI->setSuccessor(0, MaxBlock); - Changed = true; - - // If MaxBlock has phinodes in it, remove MaxPop-1 entries from - // it. - if (isa<PHINode>(MaxBlock->begin())) - for (unsigned i = 0; i != MaxPop-1; ++i) - MaxBlock->removePredecessor(SI->getParent()); - - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == MaxBlock) { - SI->removeCase(i); - --i; --e; - } - } - } - } - } - - // If this block is now dead, remove it. - if (pred_begin(BB) == pred_end(BB)) { - // We know there are no successors, so just nuke the block. - M->getBasicBlockList().erase(BB); - return true; - } - } - } + // Check for and remove branches that will always cause undefined behavior. + Changed |= removeUndefIntroducingPredecessor(BB); // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and @@ -2000,106 +2867,37 @@ // if (MergeBlockIntoPredecessor(BB)) return true; - - // Otherwise, if this block only has a single predecessor, and if that block - // is a conditional branch, see if we can hoist any code from this block up - // into our predecessor. - pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); - BasicBlock *OnlyPred = *PI++; - for (; PI != PE; ++PI) // Search all predecessors, see if they are all same - if (*PI != OnlyPred) { - OnlyPred = 0; // There are multiple different predecessors... - break; - } - if (OnlyPred) - if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator())) - if (BI->isConditional()) { - // Get the other block. - BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB); - PI = pred_begin(OtherBB); - ++PI; - - if (PI == pred_end(OtherBB)) { - // We have a conditional branch to two blocks that are only reachable - // from the condbr. We know that the condbr dominates the two blocks, - // so see if there is any identical code in the "then" and "else" - // blocks. If so, we can hoist it up to the branching block. - Changed |= HoistThenElseCodeToIf(BI); - } else { - BasicBlock* OnlySucc = NULL; - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); - SI != SE; ++SI) { - if (!OnlySucc) - OnlySucc = *SI; - else if (*SI != OnlySucc) { - OnlySucc = 0; // There are multiple distinct successors! - break; - } - } + IRBuilder<> Builder(BB); - if (OnlySucc == OtherBB) { - // If BB's only successor is the other successor of the predecessor, - // i.e. a triangle, see if we can hoist any code from this block up - // to the "if" block. - Changed |= SpeculativelyExecuteBB(BI, BB); - } - } - } + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN, TD); - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator())) - // Change br (X == 0 | X == 1), T, F into a switch instruction. - if (BI->isConditional() && isa<Instruction>(BI->getCondition())) { - Instruction *Cond = cast<Instruction>(BI->getCondition()); - // If this is a bunch of seteq's or'd together, or if it's a bunch of - // 'setne's and'ed together, collect them. - Value *CompVal = 0; - std::vector<ConstantInt*> Values; - bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values); - if (CompVal) { - // There might be duplicate constants in the list, which the switch - // instruction can't handle, remove them now. - std::sort(Values.begin(), Values.end(), ConstantIntOrdering()); - Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - - // Figure out which block is which destination. - BasicBlock *DefaultBB = BI->getSuccessor(1); - BasicBlock *EdgeBB = BI->getSuccessor(0); - if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - - // Convert pointer to int before we switch. - if (CompVal->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); - CompVal = new PtrToIntInst(CompVal, - TD->getIntPtrType(CompVal->getContext()), - BI); - } - - // Create the new switch instruction now. - SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, - Values.size(), BI); - - // Add all of the 'cases' to the switch instruction. - for (unsigned i = 0, e = Values.size(); i != e; ++i) - New->addCase(Values[i], EdgeBB); - - // We added edges from PI to the EdgeBB. As such, if there were any - // PHI nodes in EdgeBB, they need entries to be added corresponding to - // the number of edges added. - for (BasicBlock::iterator BBI = EdgeBB->begin(); - isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - Value *InVal = PN->getIncomingValueForBlock(*PI); - for (unsigned i = 0, e = Values.size()-1; i != e; ++i) - PN->addIncoming(InVal, *PI); - } - - // Erase the old branch instruction. - EraseTerminatorInstAndDCECond(BI); - return true; - } - } + Builder.SetInsertPoint(BB->getTerminator()); + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (BI->isUnconditional()) { + if (SimplifyUncondBranch(BI, Builder)) return true; + } else { + if (SimplifyCondBranch(BI, Builder)) return true; + } + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + if (SimplifyResume(RI, Builder)) return true; + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + if (SimplifyReturn(RI, Builder)) return true; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (SimplifySwitch(SI, Builder)) return true; + } else if (UnreachableInst *UI = + dyn_cast<UnreachableInst>(BB->getTerminator())) { + if (SimplifyUnreachable(UI)) return true; + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + if (SimplifyUnwind(UI, Builder)) return true; + } else if (IndirectBrInst *IBI = + dyn_cast<IndirectBrInst>(BB->getTerminator())) { + if (SimplifyIndirectBr(IBI)) return true; + } return Changed; } @@ -2109,8 +2907,6 @@ /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -/// WARNING: The entry node of a function may not be simplified. -/// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) { return SimplifyCFGOpt(TD).run(BB); }
diff --git a/src/LLVM/lib/Transforms/Utils/SimplifyIndVar.cpp b/src/LLVM/lib/Transforms/Utils/SimplifyIndVar.cpp new file mode 100644 index 0000000..76289c0 --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,432 @@ +//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements induction variable simplification. It does +// not define any actual pass or policy, but provides a single function to +// simplify a loop's induction variables based on ScalarEvolution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indvars" + +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +namespace { + /// SimplifyIndvar - This is a utility for simplifying induction variables + /// based on ScalarEvolution. It is the primary instrument of the + /// IndvarSimplify pass, but it may also be directly invoked to cleanup after + /// other loop passes that preserve SCEV. + class SimplifyIndvar { + Loop *L; + LoopInfo *LI; + DominatorTree *DT; + ScalarEvolution *SE; + IVUsers *IU; // NULL for DisableIVRewrite + const TargetData *TD; // May be NULL + + SmallVectorImpl<WeakVH> &DeadInsts; + + bool Changed; + + public: + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) : + L(Loop), + LI(LPM->getAnalysisIfAvailable<LoopInfo>()), + SE(SE), + IU(IVU), + TD(LPM->getAnalysisIfAvailable<TargetData>()), + DeadInsts(Dead), + Changed(false) { + assert(LI && "IV simplification requires LoopInfo"); + } + + bool hasChanged() const { return Changed; } + + /// Iteratively perform simplification on a worklist of users of the + /// specified induction variable. This is the top-level driver that applies + /// all simplicitions to users of an IV. + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL); + + Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); + void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); + void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool IsSigned); + }; +} + +/// foldIVUser - Fold an IV operand into its use. This removes increments of an +/// aligned IV when used by a instruction that ignores the low bits. +/// +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// +/// Return the operand of IVOperand for this induction variable if IVOperand can +/// be folded (in case more folding opportunities have been exposed). +/// Otherwise return null. +Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { + Value *IVSrc = 0; + unsigned OperIdx = 0; + const SCEV *FoldedExpr = 0; + switch (UseInst->getOpcode()) { + default: + return 0; + case Instruction::UDiv: + case Instruction::LShr: + // We're only interested in the case where we know something about + // the numerator and have a constant denominator. + if (IVOperand != UseInst->getOperand(OperIdx) || + !isa<ConstantInt>(UseInst->getOperand(1))) + return 0; + + // Attempt to fold a binary operator with constant operand. + // e.g. ((I + 1) >> 2) => I >> 2 + if (IVOperand->getNumOperands() != 2 || + !isa<ConstantInt>(IVOperand->getOperand(1))) + return 0; + + IVSrc = IVOperand->getOperand(0); + // IVSrc must be the (SCEVable) IV, since the other operand is const. + assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); + + ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); + if (UseInst->getOpcode() == Instruction::LShr) { + // Get a constant for the divisor. See createSCEV. + uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); + if (D->getValue().uge(BitWidth)) + return 0; + + D = ConstantInt::get(UseInst->getContext(), + APInt(BitWidth, 1).shl(D->getZExtValue())); + } + FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); + } + // We have something that might fold it's operand. Compare SCEVs. + if (!SE->isSCEVable(UseInst->getType())) + return 0; + + // Bypass the operand if SCEV can prove it has no effect. + if (SE->getSCEV(UseInst) != FoldedExpr) + return 0; + + DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand + << " -> " << *UseInst << '\n'); + + UseInst->setOperand(OperIdx, IVSrc); + assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); + + ++NumElimOperand; + Changed = true; + if (IVOperand->use_empty()) + DeadInsts.push_back(IVOperand); + return IVSrc; +} + +/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless +/// comparisons against an induction variable. +void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); + const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + else + return; + + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + ++NumElimCmp; + Changed = true; + DeadInsts.push_back(ICmp); +} + +/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless +/// remainder operations operating on an induction variable. +void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned) { + // We're only interested in the case where we know something about + // the numerator. + if (IVOperand != Rem->getOperand(0)) + return; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!IsSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + if (IsSigned && !SE->isKnownNonNegative(LessOne)) + return; + + if (!SE->isKnownPredicate(IsSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) + return; + + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1)); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } + + // Inform IVUsers about the new users. + if (IU) { + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I); + } + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.push_back(Rem); +} + +/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has +/// no observable side-effect given the range of IV values. +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + eliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + eliminateIVRemainder(Rem, IVOperand, IsSigned); + return true; + } + } + + // Eliminate any operation that SCEV can prove is an identity function. + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.push_back(UseInst); + return true; +} + +/// pushIVUsers - Add all uses of Def to the current IV's worklist. +/// +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) + SimpleIVUsers.push_back(std::make_pair(User, Def)); + } +} + +/// isSimpleIVUser - Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// simplifyUsers - Iteratively perform simplification on a worklist of users +/// of the specified induction variable. Each successive simplification may push +/// more users which may themselves be candidates for simplification. +/// +/// This algorithm does not require IVUsers analysis. Instead, it simplifies +/// instructions in-place during analysis. Rather than rewriting induction +/// variables bottom-up from their users, it transforms a chain of IVUsers +/// top-down, updating the IR only when it encouters a clear optimization +/// opportunitiy. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { + if (!SE->isSCEVable(CurrIV->getType())) + return; + + // Instructions processed by SimplifyIndvar for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + std::pair<Instruction*, Instruction*> UseOper = + SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseOper.first == CurrIV) continue; + + Instruction *IVOperand = UseOper.second; + for (unsigned N = 0; IVOperand; ++N) { + assert(N <= Simplified.size() && "runaway iteration"); + + Value *NewOper = foldIVUser(UseOper.first, IVOperand); + if (!NewOper) + break; // done folding + IVOperand = dyn_cast<Instruction>(NewOper); + } + if (!IVOperand) + continue; + + if (eliminateIVUser(UseOper.first, IVOperand)) { + pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + continue; + } + CastInst *Cast = dyn_cast<CastInst>(UseOper.first); + if (V && Cast) { + V->visitCast(Cast); + continue; + } + if (isSimpleIVUser(UseOper.first, L, SE)) { + pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + } + } +} + +namespace llvm { + +/// simplifyUsersOfIV - Simplify instructions that use this induction variable +/// by using ScalarEvolution to analyze the IV's recurrence. +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVVisitor *V) +{ + LoopInfo *LI = &LPM->getAnalysis<LoopInfo>(); + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead); + SIV.simplifyUsers(CurrIV, V); + return SIV.hasChanged(); +} + +/// simplifyLoopIVs - Simplify users of induction variables within this +/// loop. This does not actually change or add IVs. +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead) { + bool Changed = false; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead); + } + return Changed; +} + +/// simplifyIVUsers - Perform simplification on instructions recorded by the +/// IVUsers pass. +/// +/// This is the old approach to IV simplification to be replaced by +/// SimplifyLoopIVs. +bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead) { + SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead); + + // Each round of simplification involves a round of eliminating operations + // followed by a round of widening IVs. A single IVUsers worklist is used + // across all rounds. The inner loop advances the user. If widening exposes + // more uses, then another pass through the outer loop is triggered. + for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { + Instruction *UseInst = I->getUser(); + Value *IVOperand = I->getOperandValToReplace(); + + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + SIV.eliminateIVComparison(ICmp, IVOperand); + continue; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned); + continue; + } + } + } + return SIV.hasChanged(); +} + +} // namespace llvm
diff --git a/src/LLVM/lib/Transforms/Utils/SimplifyInstructions.cpp b/src/LLVM/lib/Transforms/Utils/SimplifyInstructions.cpp new file mode 100644 index 0000000..ac005f9 --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@ +//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instsimplify" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +STATISTIC(NumSimplified, "Number of redundant instructions removed"); + +namespace { + struct InstSimplifier : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + InstSimplifier() : FunctionPass(ID) { + initializeInstSimplifierPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + } + + /// runOnFunction - Remove instructions that simplify. + bool runOnFunction(Function &F) { + const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; + bool Changed = false; + + do { + for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), + DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) + for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) { + Instruction *I = BI++; + // The first time through the loop ToSimplify is empty and we try to + // simplify all instructions. On later iterations ToSimplify is not + // empty and we only bother simplifying instructions that are in it. + if (!ToSimplify->empty() && !ToSimplify->count(I)) + continue; + // Don't waste time simplifying unused instructions. + if (!I->use_empty()) + if (Value *V = SimplifyInstruction(I, TD, DT)) { + // Mark all uses for resimplification next time round the loop. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Next->insert(cast<Instruction>(*UI)); + I->replaceAllUsesWith(V); + ++NumSimplified; + Changed = true; + } + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I); + } + + // Place the list of instructions to simplify on the next loop iteration + // into ToSimplify. + std::swap(ToSimplify, Next); + Next->clear(); + } while (!ToSimplify->empty()); + + return Changed; + } + }; +} + +char InstSimplifier::ID = 0; +INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions", + false, false) +char &llvm::InstructionSimplifierID = InstSimplifier::ID; + +// Public interface to the simplify instructions pass. +FunctionPass *llvm::createInstructionSimplifierPass() { + return new InstSimplifier(); +}
diff --git a/src/LLVM/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/src/LLVM/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 0a691a7..46d4ada 100644 --- a/src/LLVM/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/src/LLVM/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -25,7 +25,7 @@ char UnifyFunctionExitNodes::ID = 0; INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", - "Unify function exit nodes", false, false); + "Unify function exit nodes", false, false) Pass *llvm::createUnifyFunctionExitNodesPass() { return new UnifyFunctionExitNodes(); @@ -50,13 +50,33 @@ // return. // std::vector<BasicBlock*> ReturningBlocks; + std::vector<BasicBlock*> UnwindingBlocks; std::vector<BasicBlock*> UnreachableBlocks; for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) if (isa<ReturnInst>(I->getTerminator())) ReturningBlocks.push_back(I); + else if (isa<UnwindInst>(I->getTerminator())) + UnwindingBlocks.push_back(I); else if (isa<UnreachableInst>(I->getTerminator())) UnreachableBlocks.push_back(I); + // Handle unwinding blocks first. + if (UnwindingBlocks.empty()) { + UnwindBlock = 0; + } else if (UnwindingBlocks.size() == 1) { + UnwindBlock = UnwindingBlocks.front(); + } else { + UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F); + new UnwindInst(F.getContext(), UnwindBlock); + + for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(), + E = UnwindingBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + BB->getInstList().pop_back(); // Remove the unwind insn + BranchInst::Create(UnwindBlock, BB); + } + } + // Then unreachable blocks. if (UnreachableBlocks.empty()) { UnreachableBlock = 0; @@ -64,7 +84,7 @@ UnreachableBlock = UnreachableBlocks.front(); } else { UnreachableBlock = BasicBlock::Create(F.getContext(), - &F); + "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), @@ -89,14 +109,15 @@ // instructions into unconditional branches. // BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), - &F); + "UnifiedReturnBlock", &F); PHINode *PN = 0; if (F.getReturnType()->isVoidTy()) { ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType()); + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); ReturnInst::Create(F.getContext(), PN, NewRetBlock); }
diff --git a/src/LLVM/lib/Transforms/Utils/Utils.cpp b/src/LLVM/lib/Transforms/Utils/Utils.cpp new file mode 100644 index 0000000..24e8c8f --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@ +//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the common initialization infrastructure for the +// TransformUtils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeTransformUtils - Initialize all passes in the TransformUtils +/// library. +void llvm::initializeTransformUtils(PassRegistry &Registry) { + initializeBreakCriticalEdgesPass(Registry); + initializeInstNamerPass(Registry); + initializeLCSSAPass(Registry); + initializeLoopSimplifyPass(Registry); + initializeLowerInvokePass(Registry); + initializeLowerSwitchPass(Registry); + initializePromotePassPass(Registry); + initializeUnifyFunctionExitNodesPass(Registry); + initializeInstSimplifierPass(Registry); +} + +/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. +void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) { + initializeTransformUtils(*unwrap(R)); +}
diff --git a/src/LLVM/lib/Transforms/Utils/ValueMapper.cpp b/src/LLVM/lib/Transforms/Utils/ValueMapper.cpp new file mode 100644 index 0000000..fc2538d --- /dev/null +++ b/src/LLVM/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,200 @@ +//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue function, which is shared by various parts of +// the lib/Transforms/Utils library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Metadata.h" +using namespace llvm; + +// Out of line method to get vtable etc for class. +void ValueMapTypeRemapper::Anchor() {} + +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper) { + ValueToValueMapTy::iterator I = VM.find(V); + + // If the value already exists in the map, use it. + if (I != VM.end() && I->second) return I->second; + + // Global values do not need to be seeded into the VM if they + // are using the identity mapping. + if (isa<GlobalValue>(V) || isa<MDString>(V)) + return VM[V] = const_cast<Value*>(V); + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { + // Inline asm may need *type* remapping. + FunctionType *NewTy = IA->getFunctionType(); + if (TypeMapper) { + NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); + + if (NewTy != IA->getFunctionType()) + V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), + IA->hasSideEffects(), IA->isAlignStack()); + } + + return VM[V] = const_cast<Value*>(V); + } + + + if (const MDNode *MD = dyn_cast<MDNode>(V)) { + // If this is a module-level metadata and we know that nothing at the module + // level is changing, then use an identity mapping. + if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges)) + return VM[V] = const_cast<Value*>(V); + + // Create a dummy node in case we have a metadata cycle. + MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>()); + VM[V] = Dummy; + + // Check all operands to see if any need to be remapped. + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { + Value *OP = MD->getOperand(i); + if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; + + // Ok, at least one operand needs remapping. + SmallVector<Value*, 4> Elts; + Elts.reserve(MD->getNumOperands()); + for (i = 0; i != e; ++i) { + Value *Op = MD->getOperand(i); + Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); + } + MDNode *NewMD = MDNode::get(V->getContext(), Elts); + Dummy->replaceAllUsesWith(NewMD); + VM[V] = NewMD; + MDNode::deleteTemporary(Dummy); + return NewMD; + } + + VM[V] = const_cast<Value*>(V); + MDNode::deleteTemporary(Dummy); + + // No operands needed remapping. Use an identity mapping. + return const_cast<Value*>(V); + } + + // Okay, this either must be a constant (which may or may not be mappable) or + // is something that is not in the mapping table. + Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); + if (C == 0) + return 0; + + if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { + Function *F = + cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper)); + BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, + Flags, TypeMapper)); + return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); + } + + // Otherwise, we have some other constant to remap. Start by checking to see + // if all operands have an identity remapping. + unsigned OpNo = 0, NumOperands = C->getNumOperands(); + Value *Mapped = 0; + for (; OpNo != NumOperands; ++OpNo) { + Value *Op = C->getOperand(OpNo); + Mapped = MapValue(Op, VM, Flags, TypeMapper); + if (Mapped != C) break; + } + + // See if the type mapper wants to remap the type as well. + Type *NewTy = C->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + + // If the result type and all operands match up, then just insert an identity + // mapping. + if (OpNo == NumOperands && NewTy == C->getType()) + return VM[V] = C; + + // Okay, we need to create a new constant. We've already processed some or + // all of the operands, set them all up now. + SmallVector<Constant*, 8> Ops; + Ops.reserve(NumOperands); + for (unsigned j = 0; j != OpNo; ++j) + Ops.push_back(cast<Constant>(C->getOperand(j))); + + // If one of the operands mismatch, push it and the other mapped operands. + if (OpNo != NumOperands) { + Ops.push_back(cast<Constant>(Mapped)); + + // Map the rest of the operands that aren't processed yet. + for (++OpNo; OpNo != NumOperands; ++OpNo) + Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, + Flags, TypeMapper)); + } + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return VM[V] = CE->getWithOperands(Ops, NewTy); + if (isa<ConstantArray>(C)) + return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + if (isa<ConstantStruct>(C)) + return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + if (isa<ConstantVector>(C)) + return VM[V] = ConstantVector::get(Ops); + // If this is a no-operand constant, it must be because the type was remapped. + if (isa<UndefValue>(C)) + return VM[V] = UndefValue::get(NewTy); + if (isa<ConstantAggregateZero>(C)) + return VM[V] = ConstantAggregateZero::get(NewTy); + assert(isa<ConstantPointerNull>(C)); + return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); +} + +/// RemapInstruction - Convert the instruction operands from referencing the +/// current values into those specified by VMap. +/// +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){ + // Remap operands. + for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { + Value *V = MapValue(*op, VMap, Flags, TypeMapper); + // If we aren't ignoring missing entries, assert that something happened. + if (V != 0) + *op = V; + else + assert((Flags & RF_IgnoreMissingEntries) && + "Referenced value not in value map!"); + } + + // Remap phi nodes' incoming blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); + // If we aren't ignoring missing entries, assert that something happened. + if (V != 0) + PN->setIncomingBlock(i, cast<BasicBlock>(V)); + else + assert((Flags & RF_IgnoreMissingEntries) && + "Referenced block not in value map!"); + } + } + + // Remap attached metadata. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I->getAllMetadata(MDs); + for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator + MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { + MDNode *Old = MI->second; + MDNode *New = MapValue(Old, VMap, Flags, TypeMapper); + if (New != Old) + I->setMetadata(MI->first, New); + } + + // If the instruction's type is being remapped, do so now. + if (TypeMapper) + I->mutateType(TypeMapper->remapType(I->getType())); +}