Merge commit 'dc209330cece9e680b7ae0677835e513f91c99f2' into cloud-android-current-release
Merges in changes from the master branch. Includes eliminating
generating MMX instructions, emulating Subzero intrinsics for ARM,
and floating-point filtering extension using glHint.
Change-Id: I98eadb7c8b48284f39b8f2933f31427f38bb286a
diff --git a/BUILD.gn b/BUILD.gn
index 52a78e8..7530b91 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -30,46 +30,28 @@
"_CRT_SECURE_NO_DEPRECATE",
"NOMINMAX",
"_WINDLL",
+ "NO_SANITIZE_FUNCTION=",
+ "ANGLE_DISABLE_TRACE",
]
-
- if (is_debug) {
- cflags += [ "/RTC1" ] # Run-Time Error Checks
- } else {
- defines += [ "ANGLE_DISABLE_TRACE" ]
- }
} else {
cflags = [
"-std=c++11",
- "-Wall",
"-fno-exceptions",
"-fno-operator-names",
+ "-ffunction-sections",
+ "-fdata-sections",
+ "-fomit-frame-pointer",
+ "-Os",
]
defines += [
"__STDC_CONSTANT_MACROS",
"__STDC_LIMIT_MACROS",
+ "NO_SANITIZE_FUNCTION=__attribute__((no_sanitize(\"function\")))",
+ "ANGLE_DISABLE_TRACE",
+ "NDEBUG",
]
- if (is_debug) {
- cflags += [
- "-g",
- "-g3",
- ]
- } else { # Release
- # All Release builds use function/data sections to make the shared libraries smaller
- cflags += [
- "-ffunction-sections",
- "-fdata-sections",
- "-fomit-frame-pointer",
- "-Os",
- ]
-
- defines += [
- "ANGLE_DISABLE_TRACE",
- "NDEBUG",
- ]
- }
-
if (target_cpu == "x64") { # 64 bit version
cflags += [
"-m64",
@@ -89,11 +71,10 @@
"-Wl,--hash-style=both",
"-Wl,--gc-sections",
]
+
# A bug in the gold linker prevents using ICF on 32-bit (crbug.com/729532)
if (use_gold && target_cpu == "x86") {
- ldflags += [
- "-Wl,--icf=none",
- ]
+ ldflags += [ "-Wl,--icf=none" ]
}
}
}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0841d8d..619111f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,41 +2,6 @@
project(SwiftShader C CXX)
-if(NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "The type of build: Debug Release MinSizeRel RelWithDebInfo." FORCE)
-endif()
-set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS Debug Release MinSizeRel RelWithDebInfo)
-
-option(BUILD_EGL "Build the EGL library" 1)
-if(WIN32)
- option(BUILD_GL32 "Build the OpenGL32 library" 1)
-endif()
-option(BUILD_GLESv2 "Build the OpenGL ES 2 library" 1)
-option(BUILD_GLES_CM "Build the OpenGL ES 1.1 library" 1)
-
-option(USE_GROUP_SOURCES "Group the source files in a folder tree for Visual Studio" 1)
-
-option(BUILD_SAMPLES "Build sample programs" 1)
-option(BUILD_TESTS "Build test programs" 1)
-
-set(REACTOR_BACKEND "LLVM" CACHE STRING "JIT compiler back-end used by Reactor")
-set_property(CACHE REACTOR_BACKEND PROPERTY STRINGS LLVM Subzero)
-
-# LLVM disallows calling cmake . from the main LLVM dir, the reason is that
-# it builds header files that could overwrite the orignal ones. Here we
-# want to include LLVM as a subdirectory and even though it wouldn't cause
-# the problem, if cmake . is called from the main dir, the condition that
-# LLVM checkes, "CMAKE_SOURCE_DIR == CMAKE_BINARY_DIR" will be true. So we
-# disallow it ourselves too to. In addition if there are remining CMakeFiles
-# and CMakeCache in the directory, cmake .. from a subdirectory will still
-# try to build from the main directory so we instruct users to delete these
-# files when they get the error.
-if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
- message(FATAL_ERROR "In source builds are not allowed by LLVM, please create a build/ directory and build from there. You may have to delete the CMakeCache.txt file and CMakeFiles directory that are next to the CMakeLists.txt.")
-endif()
-
-set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-
###########################################################
# Detect system
###########################################################
@@ -66,6 +31,51 @@
set(CMAKE_MACOSX_RPATH ON)
###########################################################
+# Options
+###########################################################
+
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "The type of build: Debug Release MinSizeRel RelWithDebInfo." FORCE)
+endif()
+set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS Debug Release MinSizeRel RelWithDebInfo)
+
+option(BUILD_EGL "Build the EGL library" 1)
+if(WIN32)
+ option(BUILD_GL32 "Build the OpenGL32 library" 1)
+endif()
+option(BUILD_GLESv2 "Build the OpenGL ES 2 library" 1)
+option(BUILD_GLES_CM "Build the OpenGL ES 1.1 library" 1)
+
+option(USE_GROUP_SOURCES "Group the source files in a folder tree for Visual Studio" 1)
+
+option(BUILD_SAMPLES "Build sample programs" 1)
+option(BUILD_TESTS "Build test programs" 1)
+
+if(ARCH STREQUAL "arm")
+ set(DEFAULT_REACTOR_BACKEND "Subzero")
+else()
+ set(DEFAULT_REACTOR_BACKEND "LLVM")
+endif()
+
+set(REACTOR_BACKEND DEFAULT_REACTOR_BACKEND CACHE STRING "JIT compiler back-end used by Reactor")
+set_property(CACHE REACTOR_BACKEND PROPERTY STRINGS LLVM Subzero)
+
+# LLVM disallows calling cmake . from the main LLVM dir, the reason is that
+# it builds header files that could overwrite the orignal ones. Here we
+# want to include LLVM as a subdirectory and even though it wouldn't cause
+# the problem, if cmake . is called from the main dir, the condition that
+# LLVM checkes, "CMAKE_SOURCE_DIR == CMAKE_BINARY_DIR" will be true. So we
+# disallow it ourselves too to. In addition if there are remining CMakeFiles
+# and CMakeCache in the directory, cmake .. from a subdirectory will still
+# try to build from the main directory so we instruct users to delete these
+# files when they get the error.
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
+ message(FATAL_ERROR "In source builds are not allowed by LLVM, please create a build/ directory and build from there. You may have to delete the CMakeCache.txt file and CMakeFiles directory that are next to the CMakeLists.txt.")
+endif()
+
+set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+
+###########################################################
# Convenience macros
###########################################################
@@ -854,6 +864,7 @@
INCLUDE_DIRECTORIES "${COMMON_INCLUDE_DIR}"
POSITION_INDEPENDENT_CODE 1
FOLDER "Core"
+ COMPILE_DEFINITIONS "NO_SANITIZE_FUNCTION=;"
)
target_link_libraries(SwiftShader ${OS_LIBS})
@@ -892,7 +903,7 @@
set_target_properties(libEGL PROPERTIES
INCLUDE_DIRECTORIES "${OPENGL_INCLUDE_DIR}"
FOLDER "OpenGL"
- COMPILE_DEFINITIONS "EGL_EGLEXT_PROTOTYPES; EGLAPI=;"
+ COMPILE_DEFINITIONS "EGL_EGLEXT_PROTOTYPES; EGLAPI=; NO_SANITIZE_FUNCTION=;"
PREFIX ""
)
set_target_export_map(libEGL ${SOURCE_DIR}/OpenGL/libEGL)
@@ -922,7 +933,7 @@
set_target_properties(libGLESv2 PROPERTIES
INCLUDE_DIRECTORIES "${OPENGL_INCLUDE_DIR}"
FOLDER "OpenGL"
- COMPILE_DEFINITIONS "GL_GLEXT_PROTOTYPES; GL_API=; GL_APICALL=;"
+ COMPILE_DEFINITIONS "GL_GLEXT_PROTOTYPES; GL_API=; GL_APICALL=; NO_SANITIZE_FUNCTION=;"
PREFIX ""
)
set_target_export_map(libGLESv2 ${SOURCE_DIR}/OpenGL/libGLESv2)
diff --git a/DEPS b/DEPS
deleted file mode 100644
index 43434a2..0000000
--- a/DEPS
+++ /dev/null
@@ -1,16 +0,0 @@
-# This file is used to manage SwiftShader's dependencies in the Chromium src
-# repo. It is used by gclient to determine what version of each dependency to
-# check out, and where.
-
-use_relative_paths = True
-
-vars = {
- 'chromium_git': 'https://chromium.googlesource.com',
- # Current revision of subzero.
- 'subzero_revision': 'fb705a6d55003b2c32772ae49e25b0babcff5acc',
-}
-
-deps = {
- 'third_party/pnacl-subzero':
- Var('chromium_git') + '/native_client/pnacl-subzero@' + Var('subzero_revision'),
-}
diff --git a/README.md b/README.md
index ac2edf2..ff0b9de 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,6 @@
-# SwiftShader [![Build Status](https://travis-ci.org/google/swiftshader.svg?branch=master)](https://travis-ci.org/google/swiftshader) [![Build status](https://ci.appveyor.com/api/projects/status/yrmyvb34j22jg1uj?svg=true)](https://ci.appveyor.com/project/c0d1f1ed/swiftshader)
+# SwiftShader
------------------------------------------------------------------------------------------------------------
-
-Introduction
-------------
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Build Status](https://travis-ci.org/google/swiftshader.svg?branch=master)](https://travis-ci.org/google/swiftshader) [![Build status](https://ci.appveyor.com/api/projects/status/yrmyvb34j22jg1uj?svg=true)](https://ci.appveyor.com/project/c0d1f1ed/swiftshader)
SwiftShader is a high-performance CPU-based implementation of the OpenGL ES and Direct3D 9 graphics APIs<sup>1</sup><sup>2</sup>. Its goal is to provide hardware independence for advanced 3D graphics.
diff --git a/extensions/CHROMIUM_texture_filtering_hint.txt b/extensions/CHROMIUM_texture_filtering_hint.txt
new file mode 100644
index 0000000..38af6e0
--- /dev/null
+++ b/extensions/CHROMIUM_texture_filtering_hint.txt
@@ -0,0 +1,85 @@
+Name
+
+ CHROMIUM_texture_filtering_hint
+
+Name Strings
+
+ GL_CHROMIUM_texture_filtering_hint
+
+Contributors
+
+ Alexis Hetu, Google Inc.
+ Nicolas Capens, Google Inc.
+ Shannon Woods, Google Inc.
+
+Contact
+
+ Alexis Hetu, Google Inc. (sugoi 'at' chromium 'dot' org)
+
+Version
+
+ Last Modifed Date: July 18, 2017
+
+Dependencies
+
+ This extension is written against the OpenGL ES 2.0 specification.
+
+ OpenGL ES 2.0 is required.
+
+Overview
+
+ This extension defines a way to request high precision texture filtering
+ using a new value to Hint.
+
+ When this extension is enabled, TEXTURE_FILTERING_HINT_CHROMIUM can be used
+ by the implementation as a means to distinguish between a performance
+ focused implementation, using FASTEST, or a precision focused
+ implementation, using NICEST.
+
+ Like other hints, either option is spec compliant and the behavior of
+ DONT_CARE is implementation specific.
+
+New Tokens
+
+ Accepted by the <pname> parameter of GetIntegerv, GetFloatv and GetBooleanv
+ and by the <target> parameter of Hint:
+
+ TEXTURE_FILTERING_HINT_CHROMIUM 0x8AF0
+
+New Procedures and Functions
+
+ None.
+
+Errors
+
+ None.
+
+New State
+
+ None.
+
+Issues
+
+ 1) When does the hint take effect?
+
+ At the time of the next draw call, and all subsequent draw calls.
+
+ 2) Does the first draw call after the filtering hint is changed use the
+ updated filtering method?
+
+ Yes
+
+ 3) Can I switch it back and forth between every draw call, multiple times
+ during a single frame?
+
+ Yes
+
+ 4) Do program objects which were created before the filtering hint was
+ changed and which contain sampling instructions use the filtering method
+ from when they were created, or the method at the time of draw call?
+
+ At the time of draw call.
+
+Revision History
+
+ 2/7/2014 Documented the extension
diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h
index 1284089..a3b7234 100644
--- a/include/EGL/eglplatform.h
+++ b/include/EGL/eglplatform.h
@@ -85,8 +85,7 @@
#elif defined(__ANDROID__) || defined(ANDROID)
-#include <android/native_window.h>
-
+struct ANativeWindow;
struct egl_native_pixmap_t;
typedef struct ANativeWindow* EGLNativeWindowType;
diff --git a/src/Android.mk b/src/Android.mk
index eac01d9..ec3fc96 100644
--- a/src/Android.mk
+++ b/src/Android.mk
@@ -108,12 +108,16 @@
-Wno-implicit-exception-spec-mismatch \
-Wno-overloaded-virtual \
-Wno-non-virtual-dtor \
+ -Wno-attributes \
+ -Wno-unknown-attributes \
+ -Wno-unknown-warning-option \
-fno-operator-names \
-msse2 \
-D__STDC_CONSTANT_MACROS \
-D__STDC_LIMIT_MACROS \
-DANDROID_PLATFORM_SDK_VERSION=$(PLATFORM_SDK_VERSION) \
- -std=c++11
+ -std=c++11 \
+ -DNO_SANITIZE_FUNCTION=
ifneq (16,${PLATFORM_SDK_VERSION})
COMMON_CFLAGS += -Xclang -fuse-init-array
diff --git a/src/Common/BUILD.gn b/src/Common/BUILD.gn
index 2fd4885..6b23321 100644
--- a/src/Common/BUILD.gn
+++ b/src/Common/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_common_private_config") {
if (is_win) {
@@ -21,11 +23,10 @@
]
} else {
cflags = [ "-msse2" ]
- defines = [ "LOG_TAG=\"swiftshader_common\"" ]
}
}
-source_set("swiftshader_common") {
+swiftshader_source_set("swiftshader_common") {
sources = [
"CPUID.cpp",
"Configurator.cpp",
@@ -39,5 +40,5 @@
"Timer.cpp",
]
- configs += [ ":swiftshader_common_private_config" ]
+ configs = [ ":swiftshader_common_private_config" ]
}
diff --git a/src/Common/DebugAndroid.hpp b/src/Common/DebugAndroid.hpp
index ac937e0..6dfb61d 100644
--- a/src/Common/DebugAndroid.hpp
+++ b/src/Common/DebugAndroid.hpp
@@ -16,6 +16,7 @@
#define DebugAndroid_hpp
#include <cutils/log.h>
+#include <cassert>
// On Android Virtual Devices we heavily depend on logging, even in
// production builds. We do this because AVDs are components of larger
diff --git a/src/D3D8/Direct3DDevice8.cpp b/src/D3D8/Direct3DDevice8.cpp
index 6294fbb..7f6e769 100644
--- a/src/D3D8/Direct3DDevice8.cpp
+++ b/src/D3D8/Direct3DDevice8.cpp
@@ -365,7 +365,7 @@
for(unsigned int i = 0; i < count; i++)
{
- sw::SliceRect clearRect(rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2, 0);
+ sw::Rect clearRect(rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2);
clearRect.clip(viewport.X, viewport.Y, viewport.X + viewport.Width, viewport.Y + viewport.Height);
diff --git a/src/D3D9/Direct3DDevice9.cpp b/src/D3D9/Direct3DDevice9.cpp
index 9b68c47..4be7955 100644
--- a/src/D3D9/Direct3DDevice9.cpp
+++ b/src/D3D9/Direct3DDevice9.cpp
@@ -396,7 +396,7 @@
for(unsigned int i = 0; i < count; i++)
{
- sw::SliceRect clearRect(rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2, 0);
+ sw::Rect clearRect(rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2);
clearRect.clip(viewport.X, viewport.Y, viewport.X + viewport.Width, viewport.Y + viewport.Height);
diff --git a/src/Main/BUILD.gn b/src/Main/BUILD.gn
index 150d559..dd85696 100644
--- a/src/Main/BUILD.gn
+++ b/src/Main/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_main_private_config") {
if (is_win) {
@@ -28,11 +30,12 @@
}
} else {
cflags = [ "-msse2" ]
- defines = [ "LOG_TAG=\"swiftshader_main\"" ]
+ defines =
+ [ "NO_SANITIZE_FUNCTION=__attribute__((no_sanitize(\"function\")))" ]
}
}
-source_set("swiftshader_main") {
+swiftshader_source_set("swiftshader_main") {
deps = [
"../Common:swiftshader_common",
]
@@ -59,11 +62,10 @@
}
if (is_win) {
- configs -= [ "//build/config/win:unicode" ]
libs = [ "dxguid.lib" ] # For FrameBufferDD
}
- configs += [ ":swiftshader_main_private_config" ]
+ configs = [ ":swiftshader_main_private_config" ]
include_dirs = [
"..",
diff --git a/src/Main/FrameBufferAndroid.cpp b/src/Main/FrameBufferAndroid.cpp
index 7340921..49957c8 100644
--- a/src/Main/FrameBufferAndroid.cpp
+++ b/src/Main/FrameBufferAndroid.cpp
@@ -15,6 +15,7 @@
#include "FrameBufferAndroid.hpp"
#include "GrallocAndroid.hpp"
+#include <system/window.h>
#include <cutils/log.h>
namespace sw
diff --git a/src/Main/FrameBufferAndroid.hpp b/src/Main/FrameBufferAndroid.hpp
index 7e34ea2..4400188 100644
--- a/src/Main/FrameBufferAndroid.hpp
+++ b/src/Main/FrameBufferAndroid.hpp
@@ -18,8 +18,8 @@
#include "Main/FrameBuffer.hpp"
#include "Common/Debug.hpp"
-#include <hardware/gralloc.h>
-#include <system/window.h>
+struct ANativeWindow;
+struct ANativeWindowBuffer;
namespace sw
{
diff --git a/src/Main/FrameBufferX11.cpp b/src/Main/FrameBufferX11.cpp
index 12b83e4..a065198 100644
--- a/src/Main/FrameBufferX11.cpp
+++ b/src/Main/FrameBufferX11.cpp
@@ -123,7 +123,7 @@
void FrameBufferX11::unlock()
{
- locked = 0;
+ locked = nullptr;
}
void FrameBufferX11::blit(void *source, const Rect *sourceRect, const Rect *destRect, Format sourceFormat, size_t sourceStride)
@@ -143,7 +143,7 @@
}
}
-sw::FrameBuffer *createFrameBuffer(void *display, Window window, int width, int height)
+NO_SANITIZE_FUNCTION sw::FrameBuffer *createFrameBuffer(void *display, Window window, int width, int height)
{
return new sw::FrameBufferX11((::Display*)display, window, width, height);
}
diff --git a/src/OpenGL/common/BUILD.gn b/src/OpenGL/common/BUILD.gn
index cb58ab8..9cc22bf 100644
--- a/src/OpenGL/common/BUILD.gn
+++ b/src/OpenGL/common/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_opengl_common_private_config") {
if (is_win) {
@@ -20,12 +22,10 @@
"/wd4324", # structure was padded due to alignment specifier
"/wd5030", # attribute is not recognized
]
- } else {
- defines = [ "LOG_TAG=\"swiftshader_opengl_common\"" ]
}
}
-source_set("swiftshader_opengl_common") {
+swiftshader_source_set("swiftshader_opengl_common") {
sources = [
"Image.cpp",
"MatrixStack.cpp",
@@ -33,7 +33,7 @@
"debug.cpp",
]
- configs += [ ":swiftshader_opengl_common_private_config" ]
+ configs = [ ":swiftshader_opengl_common_private_config" ]
include_dirs = [
"..",
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp
index d3fe20e..f75f92f 100644
--- a/src/OpenGL/common/Image.cpp
+++ b/src/OpenGL/common/Image.cpp
@@ -263,10 +263,10 @@
for(int x = 0; x < width; x++)
{
unsigned int rgba = source1010102U[x];
- dest16U[4 * x + 0] = (rgba & 0x00000FFC) >> 2;
- dest16U[4 * x + 1] = (rgba & 0x003FF000) >> 12;
- dest16U[4 * x + 2] = (rgba & 0xFFC00000) >> 22;
- dest16U[4 * x + 3] = (rgba & 0x00000003);
+ dest16U[4 * x + 0] = (rgba & 0x000003FF);
+ dest16U[4 * x + 1] = (rgba & 0x000FFC00) >> 10;
+ dest16U[4 * x + 2] = (rgba & 0x3FF00000) >> 20;
+ dest16U[4 * x + 3] = (rgba & 0xC0000000) >> 30;
}
}
diff --git a/src/OpenGL/common/Image.hpp b/src/OpenGL/common/Image.hpp
index c2e7f53..e13b19e 100644
--- a/src/OpenGL/common/Image.hpp
+++ b/src/OpenGL/common/Image.hpp
@@ -22,7 +22,6 @@
#include <GLES2/gl2ext.h>
#if defined(__ANDROID__)
-#include <hardware/gralloc.h>
#include <system/window.h>
#include "../../Common/GrallocAndroid.hpp"
#include "../../Common/DebugAndroid.hpp"
diff --git a/src/OpenGL/common/Object.cpp b/src/OpenGL/common/Object.cpp
index 1a4a7c8..b4d84c0 100644
--- a/src/OpenGL/common/Object.cpp
+++ b/src/OpenGL/common/Object.cpp
@@ -23,6 +23,7 @@
namespace gl
{
#ifndef NDEBUG
+sw::MutexLock Object::instances_mutex;
std::set<Object*> Object::instances;
#endif
@@ -31,6 +32,7 @@
referenceCount = 0;
#ifndef NDEBUG
+ LockGuard instances_lock(instances_mutex);
instances.insert(this);
#endif
}
@@ -40,6 +42,7 @@
ASSERT(referenceCount == 0);
#ifndef NDEBUG
+ LockGuard instances_lock(instances_mutex);
ASSERT(instances.find(this) != instances.end()); // Check for double deletion
instances.erase(this);
#endif
@@ -89,6 +92,7 @@
{
~ObjectLeakCheck()
{
+ LockGuard instances_lock(Object::instances_mutex);
ASSERT(Object::instances.empty()); // Check for GL object leak at termination
}
};
diff --git a/src/OpenGL/common/Object.hpp b/src/OpenGL/common/Object.hpp
index c6243ac..7d9a8fe 100644
--- a/src/OpenGL/common/Object.hpp
+++ b/src/OpenGL/common/Object.hpp
@@ -20,6 +20,7 @@
#define gl_Object_hpp
#include "common/debug.h"
+#include "Common/MutexLock.hpp"
#include <set>
@@ -51,6 +52,7 @@
#ifndef NDEBUG
public:
+ static sw::MutexLock instances_mutex;
static std::set<Object*> instances; // For leak checking
#endif
};
diff --git a/src/OpenGL/compiler/Android.mk b/src/OpenGL/compiler/Android.mk
index 3916255..5bca1fe 100644
--- a/src/OpenGL/compiler/Android.mk
+++ b/src/OpenGL/compiler/Android.mk
@@ -20,6 +20,9 @@
-Wno-unused-parameter \
-Wno-implicit-exception-spec-mismatch \
-Wno-overloaded-virtual \
+ -Wno-attributes \
+ -Wno-unknown-attributes \
+ -Wno-unknown-warning-option \
-fno-operator-names \
-msse2 \
-D__STDC_CONSTANT_MACROS \
diff --git a/src/OpenGL/compiler/BUILD.gn b/src/OpenGL/compiler/BUILD.gn
index 5da9390..3341e1e 100644
--- a/src/OpenGL/compiler/BUILD.gn
+++ b/src/OpenGL/compiler/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_opengl_compiler_private_config") {
if (is_win) {
@@ -28,7 +30,6 @@
}
} else {
cflags = [ "-Wno-sign-compare" ]
- defines = [ "LOG_TAG=\"swiftshader_opengl_compiler\"" ]
if (!is_debug) {
cflags += [ "-Wno-unused-variable" ] # local variable is initialized but not referenced (variables only used in ASSERTS)
@@ -36,7 +37,7 @@
}
}
-source_set("swiftshader_opengl_compiler") {
+swiftshader_source_set("swiftshader_opengl_compiler") {
deps = [
"preprocessor:swiftshader_opengl_preprocessor",
]
@@ -73,7 +74,7 @@
sources += [ "ossource_win.cpp" ]
}
- configs += [ ":swiftshader_opengl_compiler_private_config" ]
+ configs = [ ":swiftshader_opengl_compiler_private_config" ]
include_dirs = [
"..",
diff --git a/src/OpenGL/compiler/BaseTypes.h b/src/OpenGL/compiler/BaseTypes.h
index 58c0856..01f9948 100644
--- a/src/OpenGL/compiler/BaseTypes.h
+++ b/src/OpenGL/compiler/BaseTypes.h
@@ -369,6 +369,7 @@
EvqPosition,
EvqPointSize,
EvqInstanceID,
+ EvqVertexID,
// built-ins read by fragment shader
EvqFragCoord,
@@ -446,6 +447,7 @@
case EvqPosition: return "Position"; break;
case EvqPointSize: return "PointSize"; break;
case EvqInstanceID: return "InstanceID"; break;
+ case EvqVertexID: return "VertexID"; break;
case EvqFragCoord: return "FragCoord"; break;
case EvqFrontFacing: return "FrontFacing"; break;
case EvqFragColor: return "FragColor"; break;
diff --git a/src/OpenGL/compiler/Initialize.cpp b/src/OpenGL/compiler/Initialize.cpp
index 1948a57..c374531 100644
--- a/src/OpenGL/compiler/Initialize.cpp
+++ b/src/OpenGL/compiler/Initialize.cpp
@@ -471,6 +471,7 @@
symbolTable.insert(COMMON_BUILTINS, *new TVariable(NewPoolTString("gl_Position"), TType(EbtFloat, EbpHigh, EvqPosition, 4)));
symbolTable.insert(COMMON_BUILTINS, *new TVariable(NewPoolTString("gl_PointSize"), TType(EbtFloat, EbpMedium, EvqPointSize, 1)));
symbolTable.insert(ESSL3_BUILTINS, *new TVariable(NewPoolTString("gl_InstanceID"), TType(EbtInt, EbpHigh, EvqInstanceID, 1)));
+ symbolTable.insert(ESSL3_BUILTINS, *new TVariable(NewPoolTString("gl_VertexID"), TType(EbtInt, EbpHigh, EvqVertexID, 1)));
break;
default: assert(false && "Language not supported");
}
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index 8a84692..1c70f6c 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -950,6 +950,7 @@
break;
case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
case EOpLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
+ case EOpBitwiseNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
case EOpPostIncrement:
if(visit == PostVisit)
{
@@ -2554,6 +2555,7 @@
case EvqPosition: return sw::Shader::PARAMETER_OUTPUT;
case EvqPointSize: return sw::Shader::PARAMETER_OUTPUT;
case EvqInstanceID: return sw::Shader::PARAMETER_MISCTYPE;
+ case EvqVertexID: return sw::Shader::PARAMETER_MISCTYPE;
case EvqFragCoord: return sw::Shader::PARAMETER_MISCTYPE;
case EvqFrontFacing: return sw::Shader::PARAMETER_MISCTYPE;
case EvqPointCoord: return sw::Shader::PARAMETER_INPUT;
@@ -2606,9 +2608,10 @@
case EvqConstReadOnly: return temporaryRegister(operand);
case EvqPosition: return varyingRegister(operand);
case EvqPointSize: return varyingRegister(operand);
- case EvqInstanceID: vertexShader->declareInstanceId(); return 0;
- case EvqFragCoord: pixelShader->declareVPos(); return 0;
- case EvqFrontFacing: pixelShader->declareVFace(); return 1;
+ case EvqInstanceID: vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
+ case EvqVertexID: vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
+ case EvqFragCoord: pixelShader->declareVPos(); return sw::Shader::VPosIndex;
+ case EvqFrontFacing: pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
case EvqPointCoord: return varyingRegister(operand);
case EvqFragColor: return 0;
case EvqFragData: return fragmentOutputRegister(operand);
diff --git a/src/OpenGL/compiler/ParseHelper.cpp b/src/OpenGL/compiler/ParseHelper.cpp
index 83f58ce..7cca42c 100644
--- a/src/OpenGL/compiler/ParseHelper.cpp
+++ b/src/OpenGL/compiler/ParseHelper.cpp
@@ -406,6 +406,7 @@
case EvqFrontFacing: message = "can't modify gl_FrontFacing"; break;
case EvqPointCoord: message = "can't modify gl_PointCoord"; break;
case EvqInstanceID: message = "can't modify gl_InstanceID"; break;
+ case EvqVertexID: message = "can't modify gl_VertexID"; break;
default:
//
diff --git a/src/OpenGL/compiler/SymbolTable.cpp b/src/OpenGL/compiler/SymbolTable.cpp
index 89ad4d1..b2e48e8 100644
--- a/src/OpenGL/compiler/SymbolTable.cpp
+++ b/src/OpenGL/compiler/SymbolTable.cpp
@@ -27,8 +27,8 @@
#include <limits.h>
#include <algorithm>
-#if defined(_MSC_VER)
-#define snprintf _snprintf
+#if defined(_MSC_VER) && MSC_VER < 1900
+#define snprintf _snprintf
#endif
int TSymbolTableLevel::uniqueId = 0;
diff --git a/src/OpenGL/compiler/preprocessor/BUILD.gn b/src/OpenGL/compiler/preprocessor/BUILD.gn
index fadd37e..7c8c2e4 100644
--- a/src/OpenGL/compiler/preprocessor/BUILD.gn
+++ b/src/OpenGL/compiler/preprocessor/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../../../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_opengl_preprocessor_private_config") {
if (is_win) {
@@ -20,12 +22,10 @@
"/wd4267", # conversion from size_t to int/unsigned int (in autogenerated code)
"/wd4702", # unreachable code (in autogenerated code)
]
- } else {
- defines = [ "LOG_TAG=\"swiftshader_opengl_compiler\"" ]
}
}
-source_set("swiftshader_opengl_preprocessor") {
+swiftshader_source_set("swiftshader_opengl_preprocessor") {
sources = [
"Diagnostics.cpp",
"DirectiveHandler.cpp",
@@ -40,5 +40,5 @@
"Tokenizer.cpp",
]
- configs += [ ":swiftshader_opengl_preprocessor_private_config" ]
+ configs = [ ":swiftshader_opengl_preprocessor_private_config" ]
}
diff --git a/src/OpenGL/libEGL/Android.mk b/src/OpenGL/libEGL/Android.mk
index 8026c7b..9317879 100644
--- a/src/OpenGL/libEGL/Android.mk
+++ b/src/OpenGL/libEGL/Android.mk
@@ -8,7 +8,11 @@
-Wno-unused-parameter \
-Wno-implicit-exception-spec-mismatch \
-Wno-overloaded-virtual \
- -DANDROID_PLATFORM_SDK_VERSION=$(PLATFORM_SDK_VERSION)
+ -Wno-attributes \
+ -Wno-unknown-attributes \
+ -Wno-unknown-warning-option \
+ -DANDROID_PLATFORM_SDK_VERSION=$(PLATFORM_SDK_VERSION) \
+ -DNO_SANITIZE_FUNCTION=
ifneq (16,${PLATFORM_SDK_VERSION})
COMMON_CFLAGS += -Xclang -fuse-init-array
diff --git a/src/OpenGL/libEGL/BUILD.gn b/src/OpenGL/libEGL/BUILD.gn
index 0ce3a8f..543c9cc 100644
--- a/src/OpenGL/libEGL/BUILD.gn
+++ b/src/OpenGL/libEGL/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_libEGL_private_config") {
defines = [ "EGL_EGLEXT_PROTOTYPES" ]
@@ -23,24 +25,18 @@
"/wd5030", # attribute is not recognized
]
- defines += [
- "EGLAPI=",
- "LIBEGL_EXPORTS",
- ]
+ defines += [ "EGLAPI=" ]
} else {
cflags = [ "-Wno-sign-compare" ]
if (!is_clang) {
cflags += [ "-Wno-unused-but-set-variable" ]
}
- defines += [
- "LOG_TAG=\"swiftshader_libEGL\"",
- "EGLAPI=__attribute__((visibility(\"default\")))",
- ]
+ defines += [ "EGLAPI=__attribute__((visibility(\"default\"))) __attribute__((no_sanitize(\"function\")))" ]
}
}
-shared_library("swiftshader_libEGL") {
+swiftshader_shared_library("swiftshader_libEGL") {
if (!is_mac) {
output_name = "libEGL"
output_dir = "$root_out_dir/swiftshader"
@@ -62,10 +58,6 @@
"resource.h",
]
- if (is_debug) {
- sources += [ "../common/debug.cpp" ]
- }
-
if (is_mac) {
sources += [ "OSXUtils.mm" ]
libs = [
@@ -74,7 +66,6 @@
]
ldflags = [ "-Wl,-install_name,@rpath/libswiftshader_libEGL.dylib" ]
} else if (is_win) {
- configs -= [ "//build/config/win:unicode" ]
ldflags = [ "/DEF:" + rebase_path("libGLESv2.def", root_build_dir) ]
} else if (is_linux) {
sources += [ "../../Main/libX11.cpp" ]
@@ -82,12 +73,7 @@
[ "-Wl,--version-script=" + rebase_path("exports.map", root_build_dir) ]
}
- configs -= [ "//build/config/compiler:chromium_code" ]
- configs += [
- "//build/config/compiler:no_chromium_code",
- "//third_party/swiftshader:swiftshader_config",
- ":swiftshader_libEGL_private_config",
- ]
+ configs = [ ":swiftshader_libEGL_private_config" ]
include_dirs = [
"../../../include",
diff --git a/src/OpenGL/libEGL/Display.cpp b/src/OpenGL/libEGL/Display.cpp
index b08fa65..0ae67bd 100644
--- a/src/OpenGL/libEGL/Display.cpp
+++ b/src/OpenGL/libEGL/Display.cpp
@@ -676,7 +676,10 @@
if(fd != -1)
{
struct fb_var_screeninfo info;
- if(ioctl(fd, FBIOGET_VSCREENINFO, &info) >= 0)
+ int io = ioctl(fd, FBIOGET_VSCREENINFO, &info);
+ close(fd);
+
+ if(io >= 0)
{
switch(info.bits_per_pixel)
{
@@ -716,8 +719,6 @@
UNIMPLEMENTED();
}
}
-
- close(fd);
}
}
diff --git a/src/OpenGL/libEGL/exports.map b/src/OpenGL/libEGL/exports.map
index 487457e..8455dc9 100644
--- a/src/OpenGL/libEGL/exports.map
+++ b/src/OpenGL/libEGL/exports.map
@@ -1,5 +1,6 @@
{
global:
+ # EGL core functions
eglBindAPI;
eglBindTexImage;
eglChooseConfig;
@@ -46,9 +47,13 @@
eglClientWaitSyncKHR;
eglGetSyncAttribKHR;
+ # Table of function pointers to disambiguate between libraries
libEGL_swiftshader;
-local:
- *;
-};
+ # Type-strings and type-infos required by sanitizers
+ _ZTS*;
+ _ZTI*;
+local:
+ *;
+};
diff --git a/src/OpenGL/libEGL/libEGL.vcxproj b/src/OpenGL/libEGL/libEGL.vcxproj
index 01ef9cb..032077b 100644
--- a/src/OpenGL/libEGL/libEGL.vcxproj
+++ b/src/OpenGL/libEGL/libEGL.vcxproj
@@ -119,7 +119,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;LIBEGL_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;DEBUGGER_WAIT_DIALOG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;DEBUGGER_WAIT_DIALOG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
@@ -151,7 +151,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;LIBEGL_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;DEBUGGER_WAIT_DIALOG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;DEBUGGER_WAIT_DIALOG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeader>
@@ -185,7 +185,7 @@
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBEGL_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -217,7 +217,7 @@
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBEGL_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -251,7 +251,7 @@
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBEGL_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -283,7 +283,7 @@
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBEGL_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;EGL_EGLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
diff --git a/src/OpenGL/libEGL/main.cpp b/src/OpenGL/libEGL/main.cpp
index 7f024d0..3a8d1ce 100644
--- a/src/OpenGL/libEGL/main.cpp
+++ b/src/OpenGL/libEGL/main.cpp
@@ -229,7 +229,7 @@
current->context = ctx;
}
-egl::Context *getCurrentContext()
+NO_SANITIZE_FUNCTION egl::Context *getCurrentContext()
{
Current *current = getCurrent();
diff --git a/src/OpenGL/libGL/Device.cpp b/src/OpenGL/libGL/Device.cpp
index f75b060..736bb99 100644
--- a/src/OpenGL/libGL/Device.cpp
+++ b/src/OpenGL/libGL/Device.cpp
@@ -201,7 +201,7 @@
return;
}
- sw::SliceRect clearRect = renderTarget->getRect();
+ sw::Rect clearRect = renderTarget->getRect();
if(scissorEnable)
{
@@ -225,7 +225,7 @@
}
z = clamp01(z);
- sw::SliceRect clearRect = depthStencil->getRect();
+ sw::Rect clearRect = depthStencil->getRect();
if(scissorEnable)
{
@@ -242,7 +242,7 @@
return;
}
- sw::SliceRect clearRect = depthStencil->getRect();
+ sw::Rect clearRect = depthStencil->getRect();
if(scissorEnable)
{
diff --git a/src/OpenGL/libGLES_CM/Android.mk b/src/OpenGL/libGLES_CM/Android.mk
index 25cef87..8576661 100644
--- a/src/OpenGL/libGLES_CM/Android.mk
+++ b/src/OpenGL/libGLES_CM/Android.mk
@@ -14,6 +14,9 @@
-Wno-unused-parameter \
-Wno-implicit-exception-spec-mismatch \
-Wno-overloaded-virtual \
+ -Wno-attributes \
+ -Wno-unknown-attributes \
+ -Wno-unknown-warning-option \
-DANDROID_PLATFORM_SDK_VERSION=$(PLATFORM_SDK_VERSION)
ifneq (16,${PLATFORM_SDK_VERSION})
diff --git a/src/OpenGL/libGLES_CM/Device.cpp b/src/OpenGL/libGLES_CM/Device.cpp
index cb95d0c..26f53bc 100644
--- a/src/OpenGL/libGLES_CM/Device.cpp
+++ b/src/OpenGL/libGLES_CM/Device.cpp
@@ -172,7 +172,7 @@
rgba[2] = blue;
rgba[3] = alpha;
- sw::SliceRect clearRect = renderTarget->getRect();
+ sw::Rect clearRect = renderTarget->getRect();
if(scissorEnable)
{
@@ -190,7 +190,7 @@
}
z = clamp01(z);
- sw::SliceRect clearRect = depthBuffer->getRect();
+ sw::Rect clearRect = depthBuffer->getRect();
if(scissorEnable)
{
@@ -207,7 +207,7 @@
return;
}
- sw::SliceRect clearRect = stencilBuffer->getRect();
+ sw::Rect clearRect = stencilBuffer->getRect();
if(scissorEnable)
{
diff --git a/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj b/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj
index f809b28..7d2a496 100644
--- a/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj
+++ b/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj
@@ -125,7 +125,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>$(SolutionDir)\src;$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;LIBGLES_CM_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
@@ -154,7 +154,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>$(SolutionDir)\src;$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;LIBGLES_CM_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeader>
@@ -185,7 +185,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLES_CM_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -221,7 +221,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLES_CM_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -259,7 +259,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..; $(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLES_CM_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -293,7 +293,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..; $(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLES_CM_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;EGLAPI=;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
diff --git a/src/OpenGL/libGLESv2/Android.mk b/src/OpenGL/libGLESv2/Android.mk
index 3357d2a..4ae7952 100644
--- a/src/OpenGL/libGLESv2/Android.mk
+++ b/src/OpenGL/libGLESv2/Android.mk
@@ -13,7 +13,11 @@
-Wno-unused-parameter \
-Wno-implicit-exception-spec-mismatch \
-Wno-overloaded-virtual \
- -DANDROID_PLATFORM_SDK_VERSION=$(PLATFORM_SDK_VERSION)
+ -Wno-attributes \
+ -Wno-unknown-attributes \
+ -Wno-unknown-warning-option \
+ -DANDROID_PLATFORM_SDK_VERSION=$(PLATFORM_SDK_VERSION) \
+ -DNO_SANITIZE_FUNCTION=
ifneq (16,${PLATFORM_SDK_VERSION})
COMMON_CFLAGS += -Xclang -fuse-init-array
diff --git a/src/OpenGL/libGLESv2/BUILD.gn b/src/OpenGL/libGLESv2/BUILD.gn
index 102d825..a917cfd 100644
--- a/src/OpenGL/libGLESv2/BUILD.gn
+++ b/src/OpenGL/libGLESv2/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_libGLESv2_private_config") {
defines = [
@@ -26,10 +28,7 @@
"/wd5030", # attribute is not recognized
]
- defines += [
- "GL_APICALL=",
- "LIBGLESV2_EXPORTS",
- ]
+ defines += [ "GL_APICALL=" ]
if (is_clang) {
defines += [
@@ -44,15 +43,14 @@
}
defines += [
- "LOG_TAG=\"swiftshader_libGLESv2\"",
"__STDC_CONSTANT_MACROS",
"__STDC_LIMIT_MACROS",
- "GL_APICALL=__attribute__((visibility(\"default\")))",
+ "GL_APICALL=__attribute__((visibility(\"default\"))) __attribute__((no_sanitize(\"function\")))",
]
}
}
-shared_library("swiftshader_libGLESv2") {
+swiftshader_shared_library("swiftshader_libGLESv2") {
if (!is_mac) {
output_name = "libGLESv2"
output_dir = "$root_out_dir/swiftshader"
@@ -91,7 +89,6 @@
]
if (is_win) {
- configs -= [ "//build/config/win:unicode" ]
ldflags = [ "/DEF:" + rebase_path("libGLESv2.def", root_build_dir) ]
} else if (is_mac) {
ldflags = [ "-Wl,-install_name,@rpath/libswiftshader_libGLESv2.dylib" ]
@@ -100,12 +97,7 @@
[ "-Wl,--version-script=" + rebase_path("exports.map", root_build_dir) ]
}
- configs -= [ "//build/config/compiler:chromium_code" ]
- configs += [
- "//build/config/compiler:no_chromium_code",
- "//third_party/swiftshader:swiftshader_config",
- ":swiftshader_libGLESv2_private_config",
- ]
+ configs = [ ":swiftshader_libGLESv2_private_config" ]
include_dirs = [
"../../../include",
diff --git a/src/OpenGL/libGLESv2/Context.cpp b/src/OpenGL/libGLESv2/Context.cpp
index 84196ac..3cd11af 100644
--- a/src/OpenGL/libGLESv2/Context.cpp
+++ b/src/OpenGL/libGLESv2/Context.cpp
@@ -100,6 +100,7 @@
mState.rasterizerDiscardEnabled = false;
mState.generateMipmapHint = GL_DONT_CARE;
mState.fragmentShaderDerivativeHint = GL_DONT_CARE;
+ mState.textureFilteringHint = GL_DONT_CARE;
mState.lineWidth = 1.0f;
@@ -245,6 +246,11 @@
mState.pixelPackBuffer = nullptr;
mState.pixelUnpackBuffer = nullptr;
mState.genericUniformBuffer = nullptr;
+
+ for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+ mState.uniformBuffers[i].set(nullptr, 0, 0);
+ }
+
mState.renderbuffer = nullptr;
for(int i = 0; i < MAX_COMBINED_TEXTURE_IMAGE_UNITS; ++i)
@@ -677,6 +683,11 @@
// Ignore for now. It is valid for implementations to ignore hint.
}
+void Context::setTextureFilteringHint(GLenum hint)
+{
+ mState.textureFilteringHint = hint;
+}
+
void Context::setViewportParams(GLint x, GLint y, GLsizei width, GLsizei height)
{
mState.viewportX = x;
@@ -1885,6 +1896,7 @@
case GL_UNPACK_ALIGNMENT: *params = mState.unpackInfo.alignment; return true;
case GL_GENERATE_MIPMAP_HINT: *params = mState.generateMipmapHint; return true;
case GL_FRAGMENT_SHADER_DERIVATIVE_HINT_OES: *params = mState.fragmentShaderDerivativeHint; return true;
+ case GL_TEXTURE_FILTERING_HINT_CHROMIUM: *params = mState.textureFilteringHint; return true;
case GL_ACTIVE_TEXTURE: *params = (mState.activeSampler + GL_TEXTURE0); return true;
case GL_STENCIL_FUNC: *params = mState.stencilFunc; return true;
case GL_STENCIL_REF: *params = mState.stencilRef; return true;
@@ -2420,6 +2432,7 @@
case GL_UNPACK_ALIGNMENT:
case GL_GENERATE_MIPMAP_HINT:
case GL_FRAGMENT_SHADER_DERIVATIVE_HINT_OES:
+ case GL_TEXTURE_FILTERING_HINT_CHROMIUM:
case GL_RED_BITS:
case GL_GREEN_BITS:
case GL_BLUE_BITS:
@@ -2976,9 +2989,9 @@
mAppliedProgramSerial = programObject->getSerial();
}
- programObject->applyTransformFeedback(getTransformFeedback());
- programObject->applyUniformBuffers(mState.uniformBuffers);
- programObject->applyUniforms();
+ programObject->applyTransformFeedback(device, getTransformFeedback());
+ programObject->applyUniformBuffers(device, mState.uniformBuffers);
+ programObject->applyUniforms(device);
}
void Context::applyTextures()
@@ -3053,6 +3066,7 @@
device->setTextureFilter(samplerType, samplerIndex, es2sw::ConvertTextureFilter(minFilter, magFilter, maxAnisotropy));
device->setMipmapFilter(samplerType, samplerIndex, es2sw::ConvertMipMapFilter(minFilter));
device->setMaxAnisotropy(samplerType, samplerIndex, maxAnisotropy);
+ device->setHighPrecisionFiltering(samplerType, samplerIndex, mState.textureFilteringHint == GL_NICEST);
applyTexture(samplerType, samplerIndex, texture);
}
@@ -3315,7 +3329,7 @@
if(colorbuffer)
{
- sw::SliceRect clearRect = colorbuffer->getRect();
+ sw::Rect clearRect = colorbuffer->getRect();
if(mState.scissorTestEnabled)
{
@@ -3354,7 +3368,7 @@
if(depthbuffer)
{
float depth = clamp01(value);
- sw::SliceRect clearRect = depthbuffer->getRect();
+ sw::Rect clearRect = depthbuffer->getRect();
if(mState.scissorTestEnabled)
{
@@ -3378,7 +3392,7 @@
if(stencilbuffer)
{
unsigned char stencil = value < 0 ? 0 : static_cast<unsigned char>(value & 0x000000FF);
- sw::SliceRect clearRect = stencilbuffer->getRect();
+ sw::Rect clearRect = stencilbuffer->getRect();
if(mState.scissorTestEnabled)
{
@@ -4300,6 +4314,7 @@
"GL_OES_texture_half_float_linear",
"GL_OES_texture_npot",
"GL_OES_texture_3D",
+ "GL_OES_vertex_half_float",
"GL_EXT_blend_minmax",
"GL_EXT_color_buffer_half_float",
"GL_EXT_draw_buffers",
@@ -4319,6 +4334,7 @@
"GL_ANGLE_texture_compression_dxt3",
"GL_ANGLE_texture_compression_dxt5",
#endif
+ "GL_CHROMIUM_texture_filtering_hint",
"GL_NV_fence",
"GL_NV_framebuffer_blit",
"GL_NV_read_depth",
@@ -4355,6 +4371,14 @@
{
extensionsCat += std::string(extension) + " ";
}
+
+ if(clientVersion >= 3)
+ {
+ for(const char *extension : es3extensions)
+ {
+ extensionsCat += std::string(extension) + " ";
+ }
+ }
}
return (const GLubyte*)extensionsCat.c_str();
@@ -4377,7 +4401,7 @@
}
-egl::Context *es2CreateContext(egl::Display *display, const egl::Context *shareContext, int clientVersion, const egl::Config *config)
+NO_SANITIZE_FUNCTION egl::Context *es2CreateContext(egl::Display *display, const egl::Context *shareContext, int clientVersion, const egl::Config *config)
{
ASSERT(!shareContext || shareContext->getClientVersion() == clientVersion); // Should be checked by eglCreateContext
return new es2::Context(display, static_cast<const es2::Context*>(shareContext), clientVersion, config);
diff --git a/src/OpenGL/libGLESv2/Context.h b/src/OpenGL/libGLESv2/Context.h
index bbe6ddd..16e0aa2 100644
--- a/src/OpenGL/libGLESv2/Context.h
+++ b/src/OpenGL/libGLESv2/Context.h
@@ -156,6 +156,8 @@
#endif
};
+const GLenum GL_TEXTURE_FILTERING_HINT_CHROMIUM = 0x8AF0;
+
const GLint NUM_COMPRESSED_TEXTURE_FORMATS = sizeof(compressedTextureFormats) / sizeof(compressedTextureFormats[0]);
const GLint multisampleCount[] = {4, 2, 1};
@@ -376,6 +378,7 @@
GLenum generateMipmapHint;
GLenum fragmentShaderDerivativeHint;
+ GLenum textureFilteringHint;
GLint viewportX;
GLint viewportY;
@@ -489,6 +492,7 @@
void setGenerateMipmapHint(GLenum hint);
void setFragmentShaderDerivativeHint(GLenum hint);
+ void setTextureFilteringHint(GLenum hint);
void setViewportParams(GLint x, GLint y, GLsizei width, GLsizei height);
diff --git a/src/OpenGL/libGLESv2/Device.cpp b/src/OpenGL/libGLESv2/Device.cpp
index 8b8f016..53c794c 100644
--- a/src/OpenGL/libGLESv2/Device.cpp
+++ b/src/OpenGL/libGLESv2/Device.cpp
@@ -202,18 +202,14 @@
{
if(renderTarget[i])
{
- sw::SliceRect clearRect = renderTarget[i]->getRect();
+ sw::Rect clearRect = renderTarget[i]->getRect();
if(scissorEnable)
{
clearRect.clip(scissorRect.x0, scissorRect.y0, scissorRect.x1, scissorRect.y1);
}
- int depth = sw::max(renderTarget[i]->getDepth(), 1);
- for(clearRect.slice = 0; clearRect.slice < depth; clearRect.slice++)
- {
- clear(rgba, FORMAT_A32B32G32R32F, renderTarget[i], clearRect, rgbaMask);
- }
+ clear(rgba, FORMAT_A32B32G32R32F, renderTarget[i], clearRect, rgbaMask);
}
}
}
@@ -226,7 +222,7 @@
}
z = clamp01(z);
- sw::SliceRect clearRect = depthBuffer->getRect();
+ sw::Rect clearRect = depthBuffer->getRect();
if(scissorEnable)
{
@@ -243,7 +239,7 @@
return;
}
- sw::SliceRect clearRect = stencilBuffer->getRect();
+ sw::Rect clearRect = stencilBuffer->getRect();
if(scissorEnable)
{
diff --git a/src/OpenGL/libGLESv2/Program.cpp b/src/OpenGL/libGLESv2/Program.cpp
index 400da5d..24d1f3c 100644
--- a/src/OpenGL/libGLESv2/Program.cpp
+++ b/src/OpenGL/libGLESv2/Program.cpp
@@ -151,8 +151,6 @@
Program::Program(ResourceManager *manager, GLuint handle) : serial(issueSerial()), resourceManager(manager), handle(handle)
{
- device = getDevice();
-
fragmentShader = 0;
vertexShader = 0;
pixelBinary = 0;
@@ -1064,7 +1062,7 @@
}
// Applies all the uniforms set for this program object to the device
- void Program::applyUniforms()
+ void Program::applyUniforms(Device *device)
{
GLint numUniforms = static_cast<GLint>(uniformIndex.size());
for(GLint location = 0; location < numUniforms; location++)
@@ -1086,23 +1084,23 @@
switch(targetUniform->type)
{
- case GL_BOOL: applyUniform1bv(location, size, b); break;
- case GL_BOOL_VEC2: applyUniform2bv(location, size, b); break;
- case GL_BOOL_VEC3: applyUniform3bv(location, size, b); break;
- case GL_BOOL_VEC4: applyUniform4bv(location, size, b); break;
- case GL_FLOAT: applyUniform1fv(location, size, f); break;
- case GL_FLOAT_VEC2: applyUniform2fv(location, size, f); break;
- case GL_FLOAT_VEC3: applyUniform3fv(location, size, f); break;
- case GL_FLOAT_VEC4: applyUniform4fv(location, size, f); break;
- case GL_FLOAT_MAT2: applyUniformMatrix2fv(location, size, f); break;
- case GL_FLOAT_MAT2x3: applyUniformMatrix2x3fv(location, size, f); break;
- case GL_FLOAT_MAT2x4: applyUniformMatrix2x4fv(location, size, f); break;
- case GL_FLOAT_MAT3x2: applyUniformMatrix3x2fv(location, size, f); break;
- case GL_FLOAT_MAT3: applyUniformMatrix3fv(location, size, f); break;
- case GL_FLOAT_MAT3x4: applyUniformMatrix3x4fv(location, size, f); break;
- case GL_FLOAT_MAT4x2: applyUniformMatrix4x2fv(location, size, f); break;
- case GL_FLOAT_MAT4x3: applyUniformMatrix4x3fv(location, size, f); break;
- case GL_FLOAT_MAT4: applyUniformMatrix4fv(location, size, f); break;
+ case GL_BOOL: applyUniform1bv(device, location, size, b); break;
+ case GL_BOOL_VEC2: applyUniform2bv(device, location, size, b); break;
+ case GL_BOOL_VEC3: applyUniform3bv(device, location, size, b); break;
+ case GL_BOOL_VEC4: applyUniform4bv(device, location, size, b); break;
+ case GL_FLOAT: applyUniform1fv(device, location, size, f); break;
+ case GL_FLOAT_VEC2: applyUniform2fv(device, location, size, f); break;
+ case GL_FLOAT_VEC3: applyUniform3fv(device, location, size, f); break;
+ case GL_FLOAT_VEC4: applyUniform4fv(device, location, size, f); break;
+ case GL_FLOAT_MAT2: applyUniformMatrix2fv(device, location, size, f); break;
+ case GL_FLOAT_MAT2x3: applyUniformMatrix2x3fv(device, location, size, f); break;
+ case GL_FLOAT_MAT2x4: applyUniformMatrix2x4fv(device, location, size, f); break;
+ case GL_FLOAT_MAT3x2: applyUniformMatrix3x2fv(device, location, size, f); break;
+ case GL_FLOAT_MAT3: applyUniformMatrix3fv(device, location, size, f); break;
+ case GL_FLOAT_MAT3x4: applyUniformMatrix3x4fv(device, location, size, f); break;
+ case GL_FLOAT_MAT4x2: applyUniformMatrix4x2fv(device, location, size, f); break;
+ case GL_FLOAT_MAT4x3: applyUniformMatrix4x3fv(device, location, size, f); break;
+ case GL_FLOAT_MAT4: applyUniformMatrix4fv(device, location, size, f); break;
case GL_SAMPLER_2D:
case GL_SAMPLER_CUBE:
case GL_SAMPLER_EXTERNAL_OES:
@@ -1119,14 +1117,14 @@
case GL_UNSIGNED_INT_SAMPLER_3D:
case GL_INT_SAMPLER_2D_ARRAY:
case GL_UNSIGNED_INT_SAMPLER_2D_ARRAY:
- case GL_INT: applyUniform1iv(location, size, i); break;
- case GL_INT_VEC2: applyUniform2iv(location, size, i); break;
- case GL_INT_VEC3: applyUniform3iv(location, size, i); break;
- case GL_INT_VEC4: applyUniform4iv(location, size, i); break;
- case GL_UNSIGNED_INT: applyUniform1uiv(location, size, ui); break;
- case GL_UNSIGNED_INT_VEC2: applyUniform2uiv(location, size, ui); break;
- case GL_UNSIGNED_INT_VEC3: applyUniform3uiv(location, size, ui); break;
- case GL_UNSIGNED_INT_VEC4: applyUniform4uiv(location, size, ui); break;
+ case GL_INT: applyUniform1iv(device, location, size, i); break;
+ case GL_INT_VEC2: applyUniform2iv(device, location, size, i); break;
+ case GL_INT_VEC3: applyUniform3iv(device, location, size, i); break;
+ case GL_INT_VEC4: applyUniform4iv(device, location, size, i); break;
+ case GL_UNSIGNED_INT: applyUniform1uiv(device, location, size, ui); break;
+ case GL_UNSIGNED_INT_VEC2: applyUniform2uiv(device, location, size, ui); break;
+ case GL_UNSIGNED_INT_VEC3: applyUniform3uiv(device, location, size, ui); break;
+ case GL_UNSIGNED_INT_VEC4: applyUniform4uiv(device, location, size, ui); break;
default:
UNREACHABLE(targetUniform->type);
}
@@ -1136,7 +1134,7 @@
}
}
- void Program::applyUniformBuffers(BufferBinding* uniformBuffers)
+ void Program::applyUniformBuffers(Device *device, BufferBinding* uniformBuffers)
{
GLint vertexUniformBuffers[MAX_UNIFORM_BUFFER_BINDINGS];
GLint fragmentUniformBuffers[MAX_UNIFORM_BUFFER_BINDINGS];
@@ -1189,7 +1187,7 @@
}
}
- void Program::applyTransformFeedback(TransformFeedback* transformFeedback)
+ void Program::applyTransformFeedback(Device *device, TransformFeedback* transformFeedback)
{
// Make sure the flags will fit in a 64 bit unsigned int variable
ASSERT(sw::max<int>(MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS, sw::MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS) <= 64);
@@ -1930,7 +1928,7 @@
return true;
}
- bool Program::applyUniform(GLint location, float* data)
+ bool Program::applyUniform(Device *device, GLint location, float* data)
{
Uniform *targetUniform = uniforms[uniformIndex[location].index];
@@ -1947,7 +1945,7 @@
return true;
}
- bool Program::applyUniform1bv(GLint location, GLsizei count, const GLboolean *v)
+ bool Program::applyUniform1bv(Device *device, GLint location, GLsizei count, const GLboolean *v)
{
int vector[MAX_UNIFORM_VECTORS][4];
@@ -1961,10 +1959,10 @@
v += 1;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform2bv(GLint location, GLsizei count, const GLboolean *v)
+ bool Program::applyUniform2bv(Device *device, GLint location, GLsizei count, const GLboolean *v)
{
int vector[MAX_UNIFORM_VECTORS][4];
@@ -1978,10 +1976,10 @@
v += 2;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform3bv(GLint location, GLsizei count, const GLboolean *v)
+ bool Program::applyUniform3bv(Device *device, GLint location, GLsizei count, const GLboolean *v)
{
int vector[MAX_UNIFORM_VECTORS][4];
@@ -1995,10 +1993,10 @@
v += 3;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform4bv(GLint location, GLsizei count, const GLboolean *v)
+ bool Program::applyUniform4bv(Device *device, GLint location, GLsizei count, const GLboolean *v)
{
int vector[MAX_UNIFORM_VECTORS][4];
@@ -2012,10 +2010,10 @@
v += 4;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform1fv(GLint location, GLsizei count, const GLfloat *v)
+ bool Program::applyUniform1fv(Device *device, GLint location, GLsizei count, const GLfloat *v)
{
float vector[MAX_UNIFORM_VECTORS][4];
@@ -2029,10 +2027,10 @@
v += 1;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform2fv(GLint location, GLsizei count, const GLfloat *v)
+ bool Program::applyUniform2fv(Device *device, GLint location, GLsizei count, const GLfloat *v)
{
float vector[MAX_UNIFORM_VECTORS][4];
@@ -2046,10 +2044,10 @@
v += 2;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform3fv(GLint location, GLsizei count, const GLfloat *v)
+ bool Program::applyUniform3fv(Device *device, GLint location, GLsizei count, const GLfloat *v)
{
float vector[MAX_UNIFORM_VECTORS][4];
@@ -2063,15 +2061,15 @@
v += 3;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform4fv(GLint location, GLsizei count, const GLfloat *v)
+ bool Program::applyUniform4fv(Device *device, GLint location, GLsizei count, const GLfloat *v)
{
- return applyUniform(location, (float*)v);
+ return applyUniform(device, location, (float*)v);
}
- bool Program::applyUniformMatrix2fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix2fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 1) / 2][2][4];
@@ -2083,10 +2081,10 @@
value += 4;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniformMatrix2x3fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix2x3fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 1) / 2][2][4];
@@ -2098,10 +2096,10 @@
value += 6;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniformMatrix2x4fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix2x4fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 1) / 2][2][4];
@@ -2113,10 +2111,10 @@
value += 8;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniformMatrix3fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix3fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 2) / 3][3][4];
@@ -2129,10 +2127,10 @@
value += 9;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniformMatrix3x2fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix3x2fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 2) / 3][3][4];
@@ -2145,10 +2143,10 @@
value += 6;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniformMatrix3x4fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix3x4fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 2) / 3][3][4];
@@ -2161,15 +2159,15 @@
value += 12;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniformMatrix4fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix4fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
- return applyUniform(location, (float*)value);
+ return applyUniform(device, location, (float*)value);
}
- bool Program::applyUniformMatrix4x2fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix4x2fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 3) / 4][4][4];
@@ -2183,10 +2181,10 @@
value += 8;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniformMatrix4x3fv(GLint location, GLsizei count, const GLfloat *value)
+ bool Program::applyUniformMatrix4x3fv(Device *device, GLint location, GLsizei count, const GLfloat *value)
{
float matrix[(MAX_UNIFORM_VECTORS + 3) / 4][4][4];
@@ -2200,10 +2198,10 @@
value += 12;
}
- return applyUniform(location, (float*)matrix);
+ return applyUniform(device, location, (float*)matrix);
}
- bool Program::applyUniform1iv(GLint location, GLsizei count, const GLint *v)
+ bool Program::applyUniform1iv(Device *device, GLint location, GLsizei count, const GLint *v)
{
GLint vector[MAX_UNIFORM_VECTORS][4];
@@ -2248,13 +2246,13 @@
}
else
{
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
return true;
}
- bool Program::applyUniform2iv(GLint location, GLsizei count, const GLint *v)
+ bool Program::applyUniform2iv(Device *device, GLint location, GLsizei count, const GLint *v)
{
GLint vector[MAX_UNIFORM_VECTORS][4];
@@ -2268,10 +2266,10 @@
v += 2;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform3iv(GLint location, GLsizei count, const GLint *v)
+ bool Program::applyUniform3iv(Device *device, GLint location, GLsizei count, const GLint *v)
{
GLint vector[MAX_UNIFORM_VECTORS][4];
@@ -2285,10 +2283,10 @@
v += 3;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform4iv(GLint location, GLsizei count, const GLint *v)
+ bool Program::applyUniform4iv(Device *device, GLint location, GLsizei count, const GLint *v)
{
GLint vector[MAX_UNIFORM_VECTORS][4];
@@ -2302,10 +2300,10 @@
v += 4;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform1uiv(GLint location, GLsizei count, const GLuint *v)
+ bool Program::applyUniform1uiv(Device *device, GLint location, GLsizei count, const GLuint *v)
{
GLuint vector[MAX_UNIFORM_VECTORS][4];
@@ -2350,13 +2348,13 @@
}
else
{
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
return true;
}
- bool Program::applyUniform2uiv(GLint location, GLsizei count, const GLuint *v)
+ bool Program::applyUniform2uiv(Device *device, GLint location, GLsizei count, const GLuint *v)
{
GLuint vector[MAX_UNIFORM_VECTORS][4];
@@ -2370,10 +2368,10 @@
v += 2;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform3uiv(GLint location, GLsizei count, const GLuint *v)
+ bool Program::applyUniform3uiv(Device *device, GLint location, GLsizei count, const GLuint *v)
{
GLuint vector[MAX_UNIFORM_VECTORS][4];
@@ -2387,10 +2385,10 @@
v += 3;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
- bool Program::applyUniform4uiv(GLint location, GLsizei count, const GLuint *v)
+ bool Program::applyUniform4uiv(Device *device, GLint location, GLsizei count, const GLuint *v)
{
GLuint vector[MAX_UNIFORM_VECTORS][4];
@@ -2404,7 +2402,7 @@
v += 4;
}
- return applyUniform(location, (float*)vector);
+ return applyUniform(device, location, (float*)vector);
}
void Program::appendToInfoLog(const char *format, ...)
@@ -2875,7 +2873,7 @@
return orphaned;
}
- void Program::validate()
+ void Program::validate(Device* device)
{
resetInfoLog();
@@ -2886,7 +2884,7 @@
}
else
{
- applyUniforms();
+ applyUniforms(device);
if(!validateSamplers(true))
{
validated = false;
diff --git a/src/OpenGL/libGLESv2/Program.h b/src/OpenGL/libGLESv2/Program.h
index ff4bc5f..56f7df7 100644
--- a/src/OpenGL/libGLESv2/Program.h
+++ b/src/OpenGL/libGLESv2/Program.h
@@ -172,9 +172,9 @@
bool getUniformuiv(GLint location, GLsizei *bufSize, GLuint *params);
void dirtyAllUniforms();
- void applyUniforms();
- void applyUniformBuffers(BufferBinding* uniformBuffers);
- void applyTransformFeedback(TransformFeedback* transformFeedback);
+ void applyUniforms(Device *device);
+ void applyUniformBuffers(Device *device, BufferBinding* uniformBuffers);
+ void applyTransformFeedback(Device *device, TransformFeedback* transformFeedback);
void link();
bool isLinked() const;
@@ -207,7 +207,7 @@
void flagForDeletion();
bool isFlaggedForDeletion() const;
- void validate();
+ void validate(Device* device);
bool validateSamplers(bool logErrors);
bool isValidated() const;
@@ -232,32 +232,32 @@
bool areMatchingUniformBlocks(const glsl::UniformBlock &block1, const glsl::UniformBlock &block2, const Shader *shader1, const Shader *shader2);
bool defineUniform(GLenum shader, GLenum type, GLenum precision, const std::string &_name, unsigned int arraySize, int registerIndex, const Uniform::BlockInfo& blockInfo);
bool defineUniformBlock(const Shader *shader, const glsl::UniformBlock &block);
- bool applyUniform(GLint location, float* data);
- bool applyUniform1bv(GLint location, GLsizei count, const GLboolean *v);
- bool applyUniform2bv(GLint location, GLsizei count, const GLboolean *v);
- bool applyUniform3bv(GLint location, GLsizei count, const GLboolean *v);
- bool applyUniform4bv(GLint location, GLsizei count, const GLboolean *v);
- bool applyUniform1fv(GLint location, GLsizei count, const GLfloat *v);
- bool applyUniform2fv(GLint location, GLsizei count, const GLfloat *v);
- bool applyUniform3fv(GLint location, GLsizei count, const GLfloat *v);
- bool applyUniform4fv(GLint location, GLsizei count, const GLfloat *v);
- bool applyUniformMatrix2fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix2x3fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix2x4fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix3fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix3x2fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix3x4fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix4fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix4x2fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniformMatrix4x3fv(GLint location, GLsizei count, const GLfloat *value);
- bool applyUniform1iv(GLint location, GLsizei count, const GLint *v);
- bool applyUniform2iv(GLint location, GLsizei count, const GLint *v);
- bool applyUniform3iv(GLint location, GLsizei count, const GLint *v);
- bool applyUniform4iv(GLint location, GLsizei count, const GLint *v);
- bool applyUniform1uiv(GLint location, GLsizei count, const GLuint *v);
- bool applyUniform2uiv(GLint location, GLsizei count, const GLuint *v);
- bool applyUniform3uiv(GLint location, GLsizei count, const GLuint *v);
- bool applyUniform4uiv(GLint location, GLsizei count, const GLuint *v);
+ bool applyUniform(Device *device, GLint location, float* data);
+ bool applyUniform1bv(Device *device, GLint location, GLsizei count, const GLboolean *v);
+ bool applyUniform2bv(Device *device, GLint location, GLsizei count, const GLboolean *v);
+ bool applyUniform3bv(Device *device, GLint location, GLsizei count, const GLboolean *v);
+ bool applyUniform4bv(Device *device, GLint location, GLsizei count, const GLboolean *v);
+ bool applyUniform1fv(Device *device, GLint location, GLsizei count, const GLfloat *v);
+ bool applyUniform2fv(Device *device, GLint location, GLsizei count, const GLfloat *v);
+ bool applyUniform3fv(Device *device, GLint location, GLsizei count, const GLfloat *v);
+ bool applyUniform4fv(Device *device, GLint location, GLsizei count, const GLfloat *v);
+ bool applyUniformMatrix2fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix2x3fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix2x4fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix3fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix3x2fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix3x4fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix4fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix4x2fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniformMatrix4x3fv(Device *device, GLint location, GLsizei count, const GLfloat *value);
+ bool applyUniform1iv(Device *device, GLint location, GLsizei count, const GLint *v);
+ bool applyUniform2iv(Device *device, GLint location, GLsizei count, const GLint *v);
+ bool applyUniform3iv(Device *device, GLint location, GLsizei count, const GLint *v);
+ bool applyUniform4iv(Device *device, GLint location, GLsizei count, const GLint *v);
+ bool applyUniform1uiv(Device *device, GLint location, GLsizei count, const GLuint *v);
+ bool applyUniform2uiv(Device *device, GLint location, GLsizei count, const GLuint *v);
+ bool applyUniform3uiv(Device *device, GLint location, GLsizei count, const GLuint *v);
+ bool applyUniform4uiv(Device *device, GLint location, GLsizei count, const GLuint *v);
bool setUniformfv(GLint location, GLsizei count, const GLfloat *v, int numElements);
bool setUniformMatrixfv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value, GLenum type);
@@ -270,7 +270,6 @@
static unsigned int issueSerial();
private:
- es2::Device *device;
FragmentShader *fragmentShader;
VertexShader *vertexShader;
diff --git a/src/OpenGL/libGLESv2/Texture.cpp b/src/OpenGL/libGLESv2/Texture.cpp
index bf83472..f20b030 100644
--- a/src/OpenGL/libGLESv2/Texture.cpp
+++ b/src/OpenGL/libGLESv2/Texture.cpp
@@ -1975,7 +1975,7 @@
}
-egl::Image *createBackBuffer(int width, int height, sw::Format format, int multiSampleDepth)
+NO_SANITIZE_FUNCTION egl::Image *createBackBuffer(int width, int height, sw::Format format, int multiSampleDepth)
{
if(width > es2::IMPLEMENTATION_MAX_RENDERBUFFER_SIZE || height > es2::IMPLEMENTATION_MAX_RENDERBUFFER_SIZE)
{
@@ -1986,7 +1986,7 @@
return egl::Image::create(width, height, format, multiSampleDepth, false);
}
-egl::Image *createDepthStencil(int width, int height, sw::Format format, int multiSampleDepth)
+NO_SANITIZE_FUNCTION egl::Image *createDepthStencil(int width, int height, sw::Format format, int multiSampleDepth)
{
if(width > es2::IMPLEMENTATION_MAX_RENDERBUFFER_SIZE || height > es2::IMPLEMENTATION_MAX_RENDERBUFFER_SIZE)
{
diff --git a/src/OpenGL/libGLESv2/VertexDataManager.cpp b/src/OpenGL/libGLESv2/VertexDataManager.cpp
index 2fff628..59062f7 100644
--- a/src/OpenGL/libGLESv2/VertexDataManager.cpp
+++ b/src/OpenGL/libGLESv2/VertexDataManager.cpp
@@ -193,6 +193,7 @@
case GL_FIXED: translated[i].type = sw::STREAMTYPE_FIXED; break;
case GL_FLOAT: translated[i].type = sw::STREAMTYPE_FLOAT; break;
case GL_HALF_FLOAT: translated[i].type = sw::STREAMTYPE_HALF; break;
+ case GL_HALF_FLOAT_OES: translated[i].type = sw::STREAMTYPE_HALF; break;
case GL_INT_2_10_10_10_REV: translated[i].type = sw::STREAMTYPE_2_10_10_10_INT; break;
case GL_UNSIGNED_INT_2_10_10_10_REV: translated[i].type = sw::STREAMTYPE_2_10_10_10_UINT; break;
default: UNREACHABLE(attrib.mType); translated[i].type = sw::STREAMTYPE_FLOAT; break;
diff --git a/src/OpenGL/libGLESv2/exports.map b/src/OpenGL/libGLESv2/exports.map
index 8238564..adc4ff5 100644
--- a/src/OpenGL/libGLESv2/exports.map
+++ b/src/OpenGL/libGLESv2/exports.map
@@ -1,172 +1,279 @@
{
global:
- glActiveTexture;
- glAttachShader;
- glBindAttribLocation;
- glBindBuffer;
- glBindFramebuffer;
- glBindRenderbuffer;
- glBindTexture;
- glBlendColor;
- glBlendEquation;
- glBlendEquationSeparate;
- glBlendFunc;
- glBlendFuncSeparate;
- glBufferData;
- glBufferSubData;
- glCheckFramebufferStatus;
- glClear;
- glClearColor;
- glClearDepthf;
- glClearStencil;
- glColorMask;
- glCompileShader;
- glCompressedTexImage2D;
- glCompressedTexSubImage2D;
- glCopyTexImage2D;
- glCopyTexSubImage2D;
- glCreateProgram;
- glCreateShader;
- glCullFace;
- glDeleteBuffers;
- glDeleteFramebuffers;
- glDeleteProgram;
- glDeleteRenderbuffers;
- glDeleteShader;
- glDeleteTextures;
- glDepthFunc;
- glDepthMask;
- glDepthRangef;
- glDetachShader;
- glDisable;
- glDisableVertexAttribArray;
- glDrawArrays;
- glDrawElements;
- glEnable;
- glEnableVertexAttribArray;
- glFinish;
- glFlush;
- glFramebufferRenderbuffer;
- glFramebufferTexture2D;
- glFrontFace;
- glGenBuffers;
- glGenFramebuffers;
- glGenRenderbuffers;
- glGenTextures;
- glGenerateMipmap;
- glGetActiveAttrib;
- glGetActiveUniform;
- glGetAttachedShaders;
- glGetAttribLocation;
- glGetBooleanv;
- glGetBufferParameteriv;
- glGetError;
- glGetFloatv;
- glGetFramebufferAttachmentParameteriv;
- glGetIntegerv;
- glGetProgramInfoLog;
- glGetProgramiv;
- glGetRenderbufferParameteriv;
- glGetShaderInfoLog;
- glGetShaderPrecisionFormat;
- glGetShaderSource;
- glGetShaderiv;
- glGetString;
- glGetTexParameterfv;
- glGetTexParameteriv;
- glGetUniformLocation;
- glGetUniformfv;
- glGetUniformiv;
- glGetVertexAttribPointerv;
- glGetVertexAttribfv;
- glGetVertexAttribiv;
- glHint;
- glIsBuffer;
- glIsEnabled;
- glIsFramebuffer;
- glIsProgram;
- glIsRenderbuffer;
- glIsShader;
- glIsTexture;
- glLineWidth;
- glLinkProgram;
- glPixelStorei;
- glPolygonOffset;
- glReadPixels;
- glReleaseShaderCompiler;
- glRenderbufferStorage;
- glSampleCoverage;
- glScissor;
- glShaderBinary;
- glShaderSource;
- glStencilFunc;
- glStencilFuncSeparate;
- glStencilMask;
- glStencilMaskSeparate;
- glStencilOp;
- glStencilOpSeparate;
- glTexImage2D;
- glTexParameterf;
- glTexParameterfv;
- glTexParameteri;
- glTexParameteriv;
- glTexSubImage2D;
- glUniform1f;
- glUniform1fv;
- glUniform1i;
- glUniform1iv;
- glUniform2f;
- glUniform2fv;
- glUniform2i;
- glUniform2iv;
- glUniform3f;
- glUniform3fv;
- glUniform3i;
- glUniform3iv;
- glUniform4f;
- glUniform4fv;
- glUniform4i;
- glUniform4iv;
- glUniformMatrix2fv;
- glUniformMatrix3fv;
- glUniformMatrix4fv;
- glUseProgram;
- glValidateProgram;
- glVertexAttrib1f;
- glVertexAttrib1fv;
- glVertexAttrib2f;
- glVertexAttrib2fv;
- glVertexAttrib3f;
- glVertexAttrib3fv;
- glVertexAttrib4f;
- glVertexAttrib4fv;
- glVertexAttribPointer;
- glViewport;
+ # OpenGL ES 2.0 core functions
+ glActiveTexture;
+ glAttachShader;
+ glBindAttribLocation;
+ glBindBuffer;
+ glBindFramebuffer;
+ glBindRenderbuffer;
+ glBindTexture;
+ glBlendColor;
+ glBlendEquation;
+ glBlendEquationSeparate;
+ glBlendFunc;
+ glBlendFuncSeparate;
+ glBufferData;
+ glBufferSubData;
+ glCheckFramebufferStatus;
+ glClear;
+ glClearColor;
+ glClearDepthf;
+ glClearStencil;
+ glColorMask;
+ glCompileShader;
+ glCompressedTexImage2D;
+ glCompressedTexSubImage2D;
+ glCopyTexImage2D;
+ glCopyTexSubImage2D;
+ glCreateProgram;
+ glCreateShader;
+ glCullFace;
+ glDeleteBuffers;
+ glDeleteFramebuffers;
+ glDeleteProgram;
+ glDeleteRenderbuffers;
+ glDeleteShader;
+ glDeleteTextures;
+ glDepthFunc;
+ glDepthMask;
+ glDepthRangef;
+ glDetachShader;
+ glDisable;
+ glDisableVertexAttribArray;
+ glDrawArrays;
+ glDrawElements;
+ glEnable;
+ glEnableVertexAttribArray;
+ glFinish;
+ glFlush;
+ glFramebufferRenderbuffer;
+ glFramebufferTexture2D;
+ glFrontFace;
+ glGenBuffers;
+ glGenFramebuffers;
+ glGenRenderbuffers;
+ glGenTextures;
+ glGenerateMipmap;
+ glGetActiveAttrib;
+ glGetActiveUniform;
+ glGetAttachedShaders;
+ glGetAttribLocation;
+ glGetBooleanv;
+ glGetBufferParameteriv;
+ glGetError;
+ glGetFloatv;
+ glGetFramebufferAttachmentParameteriv;
+ glGetIntegerv;
+ glGetProgramInfoLog;
+ glGetProgramiv;
+ glGetRenderbufferParameteriv;
+ glGetShaderInfoLog;
+ glGetShaderPrecisionFormat;
+ glGetShaderSource;
+ glGetShaderiv;
+ glGetString;
+ glGetTexParameterfv;
+ glGetTexParameteriv;
+ glGetUniformLocation;
+ glGetUniformfv;
+ glGetUniformiv;
+ glGetVertexAttribPointerv;
+ glGetVertexAttribfv;
+ glGetVertexAttribiv;
+ glHint;
+ glIsBuffer;
+ glIsEnabled;
+ glIsFramebuffer;
+ glIsProgram;
+ glIsRenderbuffer;
+ glIsShader;
+ glIsTexture;
+ glLineWidth;
+ glLinkProgram;
+ glPixelStorei;
+ glPolygonOffset;
+ glReadPixels;
+ glReleaseShaderCompiler;
+ glRenderbufferStorage;
+ glSampleCoverage;
+ glScissor;
+ glShaderBinary;
+ glShaderSource;
+ glStencilFunc;
+ glStencilFuncSeparate;
+ glStencilMask;
+ glStencilMaskSeparate;
+ glStencilOp;
+ glStencilOpSeparate;
+ glTexImage2D;
+ glTexParameterf;
+ glTexParameterfv;
+ glTexParameteri;
+ glTexParameteriv;
+ glTexSubImage2D;
+ glUniform1f;
+ glUniform1fv;
+ glUniform1i;
+ glUniform1iv;
+ glUniform2f;
+ glUniform2fv;
+ glUniform2i;
+ glUniform2iv;
+ glUniform3f;
+ glUniform3fv;
+ glUniform3i;
+ glUniform3iv;
+ glUniform4f;
+ glUniform4fv;
+ glUniform4i;
+ glUniform4iv;
+ glUniformMatrix2fv;
+ glUniformMatrix3fv;
+ glUniformMatrix4fv;
+ glUseProgram;
+ glValidateProgram;
+ glVertexAttrib1f;
+ glVertexAttrib1fv;
+ glVertexAttrib2f;
+ glVertexAttrib2fv;
+ glVertexAttrib3f;
+ glVertexAttrib3fv;
+ glVertexAttrib4f;
+ glVertexAttrib4fv;
+ glVertexAttribPointer;
+ glViewport;
- # Extensions
- glTexImage3DOES;
- glBlitFramebufferANGLE;
- glRenderbufferStorageMultisampleANGLE;
- glDeleteFencesNV;
- glFinishFenceNV;
- glGenFencesNV;
- glGetFenceivNV;
- glIsFenceNV;
- glSetFenceNV;
- glTestFenceNV;
- glGetGraphicsResetStatusEXT;
- glReadnPixelsEXT;
- glGetnUniformfvEXT;
- glGetnUniformivEXT;
- glGenQueriesEXT;
- glDeleteQueriesEXT;
- glIsQueryEXT;
- glBeginQueryEXT;
- glEndQueryEXT;
- glGetQueryivEXT;
- glGetQueryObjectuivEXT;
- glEGLImageTargetTexture2DOES;
- glEGLImageTargetRenderbufferStorageOES;
+ # OpenGL ES 3.0 core functions
+ glReadBuffer;
+ glDrawRangeElements;
+ glTexImage3D;
+ glTexSubImage3D;
+ glCopyTexSubImage3D;
+ glCompressedTexImage3D;
+ glCompressedTexSubImage3D;
+ glGenQueries;
+ glDeleteQueries;
+ glIsQuery;
+ glBeginQuery;
+ glEndQuery;
+ glGetQueryiv;
+ glGetQueryObjectuiv;
+ glUnmapBuffer;
+ glGetBufferPointerv;
+ glDrawBuffers;
+ glUniformMatrix2x3fv;
+ glUniformMatrix3x2fv;
+ glUniformMatrix2x4fv;
+ glUniformMatrix4x2fv;
+ glUniformMatrix3x4fv;
+ glUniformMatrix4x3fv;
+ glBlitFramebuffer;
+ glRenderbufferStorageMultisample;
+ glFramebufferTextureLayer;
+ glMapBufferRange;
+ glFlushMappedBufferRange;
+ glBindVertexArray;
+ glDeleteVertexArrays;
+ glGenVertexArrays;
+ glIsVertexArray;
+ glGetIntegeri_v;
+ glBeginTransformFeedback;
+ glEndTransformFeedback;
+ glBindBufferRange;
+ glBindBufferBase;
+ glTransformFeedbackVaryings;
+ glGetTransformFeedbackVarying;
+ glVertexAttribIPointer;
+ glGetVertexAttribIiv;
+ glGetVertexAttribIuiv;
+ glVertexAttribI4i;
+ glVertexAttribI4ui;
+ glVertexAttribI4iv;
+ glVertexAttribI4uiv;
+ glGetUniformuiv;
+ glGetFragDataLocation;
+ glUniform1ui;
+ glUniform2ui;
+ glUniform3ui;
+ glUniform4ui;
+ glUniform1uiv;
+ glUniform2uiv;
+ glUniform3uiv;
+ glUniform4uiv;
+ glClearBufferiv;
+ glClearBufferuiv;
+ glClearBufferfv;
+ glClearBufferfi;
+ glGetStringi;
+ glCopyBufferSubData;
+ glGetUniformIndices;
+ glGetActiveUniformsiv;
+ glGetUniformBlockIndex;
+ glGetActiveUniformBlockiv;
+ glGetActiveUniformBlockName;
+ glUniformBlockBinding;
+ glDrawArraysInstanced;
+ glDrawElementsInstanced;
+ glFenceSync;
+ glIsSync;
+ glDeleteSync;
+ glClientWaitSync;
+ glWaitSync;
+ glGetInteger64v;
+ glGetSynciv;
+ glGetInteger64i_v;
+ glGetBufferParameteri64v;
+ glGenSamplers;
+ glDeleteSamplers;
+ glIsSampler;
+ glBindSampler;
+ glSamplerParameteri;
+ glSamplerParameteriv;
+ glSamplerParameterf;
+ glSamplerParameterfv;
+ glGetSamplerParameteriv;
+ glGetSamplerParameterfv;
+ glVertexAttribDivisor;
+ glBindTransformFeedback;
+ glDeleteTransformFeedbacks;
+ glGenTransformFeedbacks;
+ glIsTransformFeedback;
+ glPauseTransformFeedback;
+ glResumeTransformFeedback;
+ glGetProgramBinary;
+ glProgramBinary;
+ glProgramParameteri;
+ glInvalidateFramebuffer;
+ glInvalidateSubFramebuffer;
+ glTexStorage2D;
+ glTexStorage3D;
+ glGetInternalformativ;
+
+ # Extensions
+ glTexImage3DOES;
+ glBlitFramebufferANGLE;
+ glRenderbufferStorageMultisampleANGLE;
+ glDeleteFencesNV;
+ glFinishFenceNV;
+ glGenFencesNV;
+ glGetFenceivNV;
+ glIsFenceNV;
+ glSetFenceNV;
+ glTestFenceNV;
+ glGetGraphicsResetStatusEXT;
+ glReadnPixelsEXT;
+ glGetnUniformfvEXT;
+ glGetnUniformivEXT;
+ glGenQueriesEXT;
+ glDeleteQueriesEXT;
+ glIsQueryEXT;
+ glBeginQueryEXT;
+ glEndQueryEXT;
+ glGetQueryivEXT;
+ glGetQueryObjectuivEXT;
+ glEGLImageTargetTexture2DOES;
+ glEGLImageTargetRenderbufferStorageOES;
glIsRenderbufferOES;
glBindRenderbufferOES;
glDeleteRenderbuffersOES;
@@ -184,117 +291,15 @@
glGenerateMipmapOES;
glDrawBuffersEXT;
- # GLES 3.0 Functions
- glReadBuffer;
- glDrawRangeElements;
- glTexImage3D;
- glTexSubImage3D;
- glCopyTexSubImage3D;
- glCompressedTexImage3D;
- glCompressedTexSubImage3D;
- glGenQueries;
- glDeleteQueries;
- glIsQuery;
- glBeginQuery;
- glEndQuery;
- glGetQueryiv;
- glGetQueryObjectuiv;
- glUnmapBuffer;
- glGetBufferPointerv;
- glDrawBuffers;
- glUniformMatrix2x3fv;
- glUniformMatrix3x2fv;
- glUniformMatrix2x4fv;
- glUniformMatrix4x2fv;
- glUniformMatrix3x4fv;
- glUniformMatrix4x3fv;
- glBlitFramebuffer;
- glRenderbufferStorageMultisample;
- glFramebufferTextureLayer;
- glMapBufferRange;
- glFlushMappedBufferRange;
- glBindVertexArray;
- glDeleteVertexArrays;
- glGenVertexArrays;
- glIsVertexArray;
- glGetIntegeri_v;
- glBeginTransformFeedback;
- glEndTransformFeedback;
- glBindBufferRange;
- glBindBufferBase;
- glTransformFeedbackVaryings;
- glGetTransformFeedbackVarying;
- glVertexAttribIPointer;
- glGetVertexAttribIiv;
- glGetVertexAttribIuiv;
- glVertexAttribI4i;
- glVertexAttribI4ui;
- glVertexAttribI4iv;
- glVertexAttribI4uiv;
- glGetUniformuiv;
- glGetFragDataLocation;
- glUniform1ui;
- glUniform2ui;
- glUniform3ui;
- glUniform4ui;
- glUniform1uiv;
- glUniform2uiv;
- glUniform3uiv;
- glUniform4uiv;
- glClearBufferiv;
- glClearBufferuiv;
- glClearBufferfv;
- glClearBufferfi;
- glGetStringi;
- glCopyBufferSubData;
- glGetUniformIndices;
- glGetActiveUniformsiv;
- glGetUniformBlockIndex;
- glGetActiveUniformBlockiv;
- glGetActiveUniformBlockName;
- glUniformBlockBinding;
- glDrawArraysInstanced;
- glDrawElementsInstanced;
- glFenceSync;
- glIsSync;
- glDeleteSync;
- glClientWaitSync;
- glWaitSync;
- glGetInteger64v;
- glGetSynciv;
- glGetInteger64i_v;
- glGetBufferParameteri64v;
- glGenSamplers;
- glDeleteSamplers;
- glIsSampler;
- glBindSampler;
- glSamplerParameteri;
- glSamplerParameteriv;
- glSamplerParameterf;
- glSamplerParameterfv;
- glGetSamplerParameteriv;
- glGetSamplerParameterfv;
- glVertexAttribDivisor;
- glBindTransformFeedback;
- glDeleteTransformFeedbacks;
- glGenTransformFeedbacks;
- glIsTransformFeedback;
- glPauseTransformFeedback;
- glResumeTransformFeedback;
- glGetProgramBinary;
- glProgramBinary;
- glProgramParameteri;
- glInvalidateFramebuffer;
- glInvalidateSubFramebuffer;
- glTexStorage2D;
- glTexStorage3D;
- glGetInternalformativ;
+ # Table of function pointers to disambiguate between libraries
+ libGLESv2_swiftshader;
- libGLESv2_swiftshader;
+ # Type-strings and type-infos required by sanitizers
+ _ZTS*;
+ _ZTI*;
- Register;
+ Register;
local:
- *;
+ *;
};
-
diff --git a/src/OpenGL/libGLESv2/libGLESv2.cpp b/src/OpenGL/libGLESv2/libGLESv2.cpp
index 8be9056..f486982 100644
--- a/src/OpenGL/libGLESv2/libGLESv2.cpp
+++ b/src/OpenGL/libGLESv2/libGLESv2.cpp
@@ -4247,6 +4247,9 @@
case GL_FRAGMENT_SHADER_DERIVATIVE_HINT_OES:
if(context) context->setFragmentShaderDerivativeHint(mode);
break;
+ case GL_TEXTURE_FILTERING_HINT_CHROMIUM:
+ if(context) context->setTextureFilteringHint(mode);
+ break;
default:
return error(GL_INVALID_ENUM);
}
@@ -5970,7 +5973,7 @@
}
}
- programObject->validate();
+ programObject->validate(context->getDevice());
}
}
@@ -6143,6 +6146,7 @@
case GL_UNSIGNED_SHORT:
case GL_FIXED:
case GL_FLOAT:
+ case GL_HALF_FLOAT_OES: // GL_OES_vertex_half_float
break;
case GL_INT_2_10_10_10_REV:
case GL_UNSIGNED_INT_2_10_10_10_REV:
@@ -6852,7 +6856,7 @@
}
-extern "C" __eglMustCastToProperFunctionPointerType es2GetProcAddress(const char *procname)
+extern "C" NO_SANITIZE_FUNCTION __eglMustCastToProperFunctionPointerType es2GetProcAddress(const char *procname)
{
struct Extension
{
diff --git a/src/OpenGL/libGLESv2/libGLESv2.vcxproj b/src/OpenGL/libGLESv2/libGLESv2.vcxproj
index bbb16a9..bae560e 100644
--- a/src/OpenGL/libGLESv2/libGLESv2.vcxproj
+++ b/src/OpenGL/libGLESv2/libGLESv2.vcxproj
@@ -125,7 +125,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>$(SolutionDir)\src;$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;LIBGLESV2_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
@@ -156,7 +156,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>$(SolutionDir)\src;$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;_DEBUG;_WINDOWS;_USRDLL;LIBGLESV2_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;_DEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeader>
@@ -189,7 +189,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLESV2_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -227,7 +227,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..;$(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLESV2_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -267,7 +267,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..; $(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLESV2_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -303,7 +303,7 @@
<Optimization>Full</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>$(ProjectDir)/..; $(ProjectDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NDEBUG;_WINDOWS;_USRDLL;LIBGLESV2_EXPORTS;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;GL_API=;GL_APICALL=;GL_GLEXT_PROTOTYPES;NO_SANITIZE_FUNCTION=;NDEBUG;_WINDOWS;_USRDLL;_CRT_SECURE_NO_DEPRECATE;NOMINMAX;_SECURE_SCL=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
diff --git a/src/OpenGL/libGLESv2/utilities.cpp b/src/OpenGL/libGLESv2/utilities.cpp
index d5b2b28..4257b69 100644
--- a/src/OpenGL/libGLESv2/utilities.cpp
+++ b/src/OpenGL/libGLESv2/utilities.cpp
@@ -1087,6 +1087,7 @@
case GL_RGBA16I:
case GL_RGBA32I:
case GL_RGBA32UI:
+ case GL_R11F_G11F_B10F:
return clientVersion >= 3;
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32_OES:
diff --git a/src/Reactor/BUILD.gn b/src/Reactor/BUILD.gn
index 5fa683e..4bc15c5 100644
--- a/src/Reactor/BUILD.gn
+++ b/src/Reactor/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../swiftshader.gni")
+
declare_args() {
# Currently, Subzero is not used by default
# LLVM is still the default backend
@@ -73,6 +75,7 @@
"/wd4146",
"/wd4245", # conversion from int to unsigned int (llvm)
"/wd4267",
+ "/wd4291",
"/wd4310",
"/wd4334",
"/wd4389",
@@ -110,6 +113,8 @@
"/wd4146",
"/wd4245", # conversion from int to unsigned int (llvm)
"/wd4267",
+ "/wd4291",
+ "/wd4309",
"/wd4702",
"/wd4800",
]
@@ -146,7 +151,6 @@
]
defines = [
- "LOG_TAG=\"swiftshader_reactor\"",
"__STDC_CONSTANT_MACROS",
"__STDC_LIMIT_MACROS",
]
@@ -154,7 +158,7 @@
}
if (use_swiftshader_with_subzero) {
- source_set("swiftshader_subzero") {
+ swiftshader_source_set("swiftshader_subzero") {
subzero_dir = "../../third_party/subzero"
subzero_llvm_dir = "../../third_party/llvm-subzero"
@@ -241,17 +245,14 @@
]
}
- if (is_win) {
- configs -= [ "//build/config/win:unicode" ]
- }
- configs += [
+ configs = [
":swiftshader_subzero_common_private_config",
":swiftshader_subzero_private_config",
]
}
}
-source_set("swiftshader_reactor") {
+swiftshader_source_set("swiftshader_reactor") {
deps = [
"../OpenGL/common:swiftshader_opengl_common",
]
@@ -268,7 +269,7 @@
"SubzeroReactor.cpp",
]
- configs += [
+ configs = [
":swiftshader_subzero_common_private_config",
":swiftshader_reactor_with_subzero_private_config",
]
@@ -281,7 +282,7 @@
"LLVMRoutineManager.cpp",
]
- configs += [ ":swiftshader_reactor_private_config" ]
+ configs = [ ":swiftshader_reactor_private_config" ]
include_dirs = [
"..",
@@ -289,8 +290,4 @@
"../../third_party/LLVM/include/",
]
}
-
- if (is_win) {
- configs -= [ "//build/config/win:unicode" ]
- }
}
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index dda5c04..c66e5a5 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -76,20 +76,54 @@
namespace sw
{
- using namespace llvm;
-
Optimization optimization[10] = {InstructionCombining, Disabled};
- class Type : public llvm::Type {};
+ enum EmulatedType
+ {
+ Type_v2i32,
+ Type_v4i16,
+ Type_v2i16,
+ Type_v8i8,
+ Type_v4i8,
+ Type_v2f32,
+ EmulatedTypeCount
+ };
+
class Value : public llvm::Value {};
class SwitchCases : public llvm::SwitchInst {};
class BasicBlock : public llvm::BasicBlock {};
+ llvm::Type *T(Type *t)
+ {
+ uintptr_t type = reinterpret_cast<uintptr_t>(t);
+ if(type < EmulatedTypeCount)
+ {
+ // Use 128-bit vectors to implement logically shorter ones.
+ switch(type)
+ {
+ case Type_v2i32: return T(Int4::getType());
+ case Type_v4i16: return T(Short8::getType());
+ case Type_v2i16: return T(Short8::getType());
+ case Type_v8i8: return T(Byte16::getType());
+ case Type_v4i8: return T(Byte16::getType());
+ case Type_v2f32: return T(Float4::getType());
+ default: assert(false);
+ }
+ }
+
+ return reinterpret_cast<llvm::Type*>(t);
+ }
+
inline Type *T(llvm::Type *t)
{
return reinterpret_cast<Type*>(t);
}
+ Type *T(EmulatedType t)
+ {
+ return reinterpret_cast<Type*>(t);
+ }
+
inline Value *V(llvm::Value *t)
{
return reinterpret_cast<Value*>(t);
@@ -105,19 +139,39 @@
return reinterpret_cast<BasicBlock*>(t);
}
+ static size_t typeSize(Type *type)
+ {
+ uintptr_t t = reinterpret_cast<uintptr_t>(type);
+ if(t < EmulatedTypeCount)
+ {
+ switch(t)
+ {
+ case Type_v2i32: return 8;
+ case Type_v4i16: return 8;
+ case Type_v2i16: return 4;
+ case Type_v8i8: return 8;
+ case Type_v4i8: return 4;
+ case Type_v2f32: return 8;
+ default: assert(false);
+ }
+ }
+
+ return T(type)->getPrimitiveSizeInBits() / 8;
+ }
+
Nucleus::Nucleus()
{
::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
- InitializeNativeTarget();
- JITEmitDebugInfo = false;
+ llvm::InitializeNativeTarget();
+ llvm::JITEmitDebugInfo = false;
if(!::context)
{
- ::context = new LLVMContext();
+ ::context = new llvm::LLVMContext();
}
- ::module = new Module("", *::context);
+ ::module = new llvm::Module("", *::context);
::routineManager = new LLVMRoutineManager();
#if defined(__x86_64__)
@@ -126,7 +180,7 @@
const char *architecture = "x86";
#endif
- SmallVector<std::string, 1> MAttrs;
+ llvm::SmallVector<std::string, 1> MAttrs;
MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
@@ -136,12 +190,12 @@
MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
std::string error;
- TargetMachine *targetMachine = EngineBuilder::selectTarget(::module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
- ::executionEngine = JIT::createJIT(::module, 0, ::routineManager, CodeGenOpt::Aggressive, true, targetMachine);
+ llvm::TargetMachine *targetMachine = llvm::EngineBuilder::selectTarget(::module, architecture, "", MAttrs, llvm::Reloc::Default, llvm::CodeModel::JITDefault, &error);
+ ::executionEngine = llvm::JIT::createJIT(::module, 0, ::routineManager, llvm::CodeGenOpt::Aggressive, true, targetMachine);
if(!::builder)
{
- ::builder = new IRBuilder<>(*::context);
+ ::builder = new llvm::IRBuilder<>(*::context);
#if defined(_WIN32)
HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
@@ -181,14 +235,14 @@
}
else
{
- createRet(V(UndefValue::get(type)));
+ createRet(V(llvm::UndefValue::get(type)));
}
}
if(false)
{
std::string error;
- raw_fd_ostream file("llvm-dump-unopt.txt", error);
+ llvm::raw_fd_ostream file("llvm-dump-unopt.txt", error);
::module->print(file, 0);
}
@@ -200,7 +254,7 @@
if(false)
{
std::string error;
- raw_fd_ostream file("llvm-dump-opt.txt", error);
+ llvm::raw_fd_ostream file("llvm-dump-opt.txt", error);
::module->print(file, 0);
}
@@ -217,33 +271,33 @@
void Nucleus::optimize()
{
- static PassManager *passManager = nullptr;
+ static llvm::PassManager *passManager = nullptr;
if(!passManager)
{
- passManager = new PassManager();
+ passManager = new llvm::PassManager();
- UnsafeFPMath = true;
- // NoInfsFPMath = true;
- // NoNaNsFPMath = true;
+ llvm::UnsafeFPMath = true;
+ // llvm::NoInfsFPMath = true;
+ // llvm::NoNaNsFPMath = true;
- passManager->add(new TargetData(*::executionEngine->getTargetData()));
- passManager->add(createScalarReplAggregatesPass());
+ passManager->add(new llvm::TargetData(*::executionEngine->getTargetData()));
+ passManager->add(llvm::createScalarReplAggregatesPass());
for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
{
switch(optimization[pass])
{
- case Disabled: break;
- case CFGSimplification: passManager->add(createCFGSimplificationPass()); break;
- case LICM: passManager->add(createLICMPass()); break;
- case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break;
- case GVN: passManager->add(createGVNPass()); break;
- case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
- case Reassociate: passManager->add(createReassociatePass()); break;
- case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
- case SCCP: passManager->add(createSCCPPass()); break;
- case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
+ case Disabled: break;
+ case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
+ case LICM: passManager->add(llvm::createLICMPass()); break;
+ case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
+ case GVN: passManager->add(llvm::createGVNPass()); break;
+ case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
+ case Reassociate: passManager->add(llvm::createReassociatePass()); break;
+ case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
+ case SCCP: passManager->add(llvm::createSCCPPass()); break;
+ case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
default:
assert(false);
}
@@ -258,15 +312,15 @@
// Need to allocate it in the entry block for mem2reg to work
llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
- Instruction *declaration;
+ llvm::Instruction *declaration;
if(arraySize)
{
- declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
+ declaration = new llvm::AllocaInst(T(type), Nucleus::createConstantInt(arraySize));
}
else
{
- declaration = new AllocaInst(type, (Value*)0);
+ declaration = new llvm::AllocaInst(T(type), (Value*)nullptr);
}
entryBlock.getInstList().push_front(declaration);
@@ -292,7 +346,7 @@
void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
{
- llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, T(Params), false);
+ llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
::function->setCallingConv(llvm::CallingConv::C);
@@ -314,15 +368,11 @@
void Nucleus::createRetVoid()
{
- x86::emms();
-
::builder->CreateRetVoid();
}
void Nucleus::createRet(Value *v)
{
- x86::emms();
-
::builder->CreateRet(v);
}
@@ -441,73 +491,154 @@
return V(::builder->CreateNot(v));
}
- Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
+ Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
{
- assert(ptr->getType()->getContainedType(0) == type);
- return V(::builder->Insert(new LoadInst(ptr, "", isVolatile, align)));
+ uintptr_t t = reinterpret_cast<uintptr_t>(type);
+ if(t < EmulatedTypeCount)
+ {
+ switch(t)
+ {
+ case Type_v2i32:
+ case Type_v4i16:
+ case Type_v8i8:
+ case Type_v2f32:
+ return createBitCast(createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 0), T(T(type)));
+ case Type_v2i16:
+ case Type_v4i8:
+ if(alignment != 0) // Not a local variable (all vectors are 128-bit).
+ {
+ Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
+ Value *i = V(createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment));
+ i = createZExt(i, Long::getType());
+ Value *v = V(createInsertElement(u, i, 0));
+ return createBitCast(v, T(T(type)));
+ }
+ break;
+ default:
+ assert(false);
+ }
+ }
+
+ assert(ptr->getType()->getContainedType(0) == T(type));
+ return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, alignment)));
}
- Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
+ Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
{
- assert(ptr->getType()->getContainedType(0) == type);
- ::builder->Insert(new StoreInst(value, ptr, isVolatile, align));
+ uintptr_t t = reinterpret_cast<uintptr_t>(type);
+ if(t < EmulatedTypeCount)
+ {
+ switch(t)
+ {
+ case Type_v2i32:
+ case Type_v4i16:
+ case Type_v8i8:
+ case Type_v2f32:
+ createStore(createExtractElement(createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment);
+ return value;
+ case Type_v2i16:
+ case Type_v4i8:
+ if(alignment != 0) // Not a local variable (all vectors are 128-bit).
+ {
+ createStore(createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
+ return value;
+ }
+ break;
+ default:
+ assert(false);
+ }
+ }
+
+ assert(ptr->getType()->getContainedType(0) == T(type));
+ ::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, alignment));
return value;
}
Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
{
- if(unsignedIndex && sizeof(void*) == 8)
+ if(sizeof(void*) == 8)
{
- index = createZExt(index, Long::getType());
+ if(unsignedIndex)
+ {
+ index = createZExt(index, Long::getType());
+ }
+ else
+ {
+ index = createSExt(index, Long::getType());
+ }
+
+ index = createMul(index, createConstantLong((int64_t)typeSize(type)));
+ }
+ else
+ {
+ index = createMul(index, createConstantInt((int)typeSize(type)));
}
- assert(ptr->getType()->getContainedType(0) == type);
- return V(::builder->CreateGEP(ptr, index));
+ assert(ptr->getType()->getContainedType(0) == T(type));
+ return createBitCast(V(::builder->CreateGEP(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0))), index)), T(llvm::PointerType::get(T(type), 0)));
}
Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
{
- return V(::builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent));
+ return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, ptr, value, llvm::SequentiallyConsistent));
}
Value *Nucleus::createTrunc(Value *v, Type *destType)
{
- return V(::builder->CreateTrunc(v, destType));
+ return V(::builder->CreateTrunc(v, T(destType)));
}
Value *Nucleus::createZExt(Value *v, Type *destType)
{
- return V(::builder->CreateZExt(v, destType));
+ return V(::builder->CreateZExt(v, T(destType)));
}
Value *Nucleus::createSExt(Value *v, Type *destType)
{
- return V(::builder->CreateSExt(v, destType));
+ return V(::builder->CreateSExt(v, T(destType)));
}
Value *Nucleus::createFPToSI(Value *v, Type *destType)
{
- return V(::builder->CreateFPToSI(v, destType));
+ return V(::builder->CreateFPToSI(v, T(destType)));
}
Value *Nucleus::createSIToFP(Value *v, Type *destType)
{
- return V(::builder->CreateSIToFP(v, destType));
+ return V(::builder->CreateSIToFP(v, T(destType)));
}
Value *Nucleus::createFPTrunc(Value *v, Type *destType)
{
- return V(::builder->CreateFPTrunc(v, destType));
+ return V(::builder->CreateFPTrunc(v, T(destType)));
}
Value *Nucleus::createFPExt(Value *v, Type *destType)
{
- return V(::builder->CreateFPExt(v, destType));
+ return V(::builder->CreateFPExt(v, T(destType)));
}
Value *Nucleus::createBitCast(Value *v, Type *destType)
{
- return V(::builder->CreateBitCast(v, destType));
+ // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
+ // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
+ // reading back as the destination type.
+ if(!v->getType()->isVectorTy() && T(destType)->isVectorTy())
+ {
+ Value *readAddress = allocateStackVariable(destType);
+ Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(v->getType(), 0)));
+ createStore(v, writeAddress, T(v->getType()));
+ return createLoad(readAddress, destType);
+ }
+ else if(v->getType()->isVectorTy() && !T(destType)->isVectorTy())
+ {
+ Value *writeAddress = allocateStackVariable(T(v->getType()));
+ createStore(v, writeAddress, T(v->getType()));
+ Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
+ return createLoad(readAddress, destType);
+ }
+
+ return V(::builder->CreateBitCast(v, T(destType)));
}
Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
@@ -632,7 +763,7 @@
Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
{
- assert(vector->getType()->getContainedType(0) == type);
+ assert(vector->getType()->getContainedType(0) == T(type));
return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
}
@@ -650,7 +781,7 @@
for(int i = 0; i < size; i++)
{
- swizzle[i] = llvm::ConstantInt::get(Type::getInt32Ty(*::context), select[i]);
+ swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
}
llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
@@ -670,7 +801,7 @@
void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
{
- switchCases->addCase(llvm::ConstantInt::get(Type::getInt32Ty(*::context), label, true), branch);
+ switchCases->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), branch);
}
void Nucleus::createUnreachable()
@@ -713,74 +844,74 @@
Type *Nucleus::getPointerType(Type *ElementType)
{
- return T(llvm::PointerType::get(ElementType, 0));
+ return T(llvm::PointerType::get(T(ElementType), 0));
}
Value *Nucleus::createNullValue(Type *Ty)
{
- return V(llvm::Constant::getNullValue(Ty));
+ return V(llvm::Constant::getNullValue(T(Ty)));
}
Value *Nucleus::createConstantLong(int64_t i)
{
- return V(llvm::ConstantInt::get(Type::getInt64Ty(*::context), i, true));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
}
Value *Nucleus::createConstantInt(int i)
{
- return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, true));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
}
Value *Nucleus::createConstantInt(unsigned int i)
{
- return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, false));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
}
Value *Nucleus::createConstantBool(bool b)
{
- return V(llvm::ConstantInt::get(Type::getInt1Ty(*::context), b));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
}
Value *Nucleus::createConstantByte(signed char i)
{
- return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, true));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
}
Value *Nucleus::createConstantByte(unsigned char i)
{
- return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, false));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
}
Value *Nucleus::createConstantShort(short i)
{
- return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, true));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
}
Value *Nucleus::createConstantShort(unsigned short i)
{
- return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, false));
+ return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
}
Value *Nucleus::createConstantFloat(float x)
{
- return V(llvm::ConstantFP::get(Float::getType(), x));
+ return V(llvm::ConstantFP::get(T(Float::getType()), x));
}
Value *Nucleus::createNullPointer(Type *Ty)
{
- return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0)));
+ return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
}
Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
{
- assert(llvm::isa<VectorType>(type));
- const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
+ assert(llvm::isa<llvm::VectorType>(T(type)));
+ const int numConstants = llvm::cast<llvm::VectorType>(T(type))->getNumElements();
assert(numConstants <= 16);
llvm::Constant *constantVector[16];
for(int i = 0; i < numConstants; i++)
{
- constantVector[i] = llvm::ConstantInt::get(type->getContainedType(0), constants[i]);
+ constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i]);
}
return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
@@ -788,14 +919,14 @@
Value *Nucleus::createConstantVector(const double *constants, Type *type)
{
- assert(llvm::isa<VectorType>(type));
- const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
+ assert(llvm::isa<llvm::VectorType>(T(type)));
+ const int numConstants = llvm::cast<llvm::VectorType>(T(type))->getNumElements();
assert(numConstants <= 8);
llvm::Constant *constantVector[8];
for(int i = 0; i < numConstants; i++)
{
- constantVector[i] = llvm::ConstantFP::get(type->getContainedType(0), constants[i]);
+ constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i]);
}
return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
@@ -806,17 +937,6 @@
return T(llvm::Type::getVoidTy(*::context));
}
- class MMX : public LValue<MMX>
- {
- public:
- static Type *getType();
- };
-
- Type *MMX::getType()
- {
- return T(llvm::Type::getX86_MMXTy(*::context));
- }
-
Bool::Bool(Argument<Bool> argument)
{
storeValue(argument.value);
@@ -1894,7 +2014,7 @@
Byte4::Byte4(RValue<Byte8> cast)
{
- storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), Int::getType()));
+ storeValue(Nucleus::createBitCast(cast.value, getType()));
}
Byte4::Byte4(const Reference<Byte4> &rhs)
@@ -1905,28 +2025,18 @@
Type *Byte4::getType()
{
- #if 0
- return T(VectorType::get(Byte::getType(), 4));
- #else
- return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
- #endif
+ return T(Type_v4i8);
}
Type *SByte4::getType()
{
- #if 0
- return T(VectorType::get(SByte::getType(), 4));
- #else
- return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
- #endif
+ return T(Type_v4i8);
}
Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{
int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Byte::getType(), 8))));
-
- storeValue(Nucleus::createBitCast(vector, getType()));
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
Byte8::Byte8(RValue<Byte8> rhs)
@@ -1971,26 +2081,12 @@
RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::paddb(lhs, rhs);
- }
- else
- {
- return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
- }
+ return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
}
RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::psubb(lhs, rhs);
- }
- else
- {
- return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
- }
+ return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
}
// RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
@@ -2010,38 +2106,17 @@
RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
- }
+ return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
}
RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
- }
+ return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
}
RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
- }
+ return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
}
// RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
@@ -2116,14 +2191,7 @@
RValue<Byte8> operator~(RValue<Byte8> val)
{
- if(CPUID::supportsMMX2())
- {
- return val ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- }
- else
- {
- return RValue<Byte8>(Nucleus::createNot(val.value));
- }
+ return RValue<Byte8>(Nucleus::createNot(val.value));
}
RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
@@ -2138,48 +2206,26 @@
RValue<Short4> Unpack(RValue<Byte4> x)
{
- Value *int2 = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
- Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
-
- return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
+ int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; // Real type is v16i8
+ return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
{
- Value *xx = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
- Value *yy = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), y.value, 0);
-
- return UnpackLow(As<Byte8>(xx), As<Byte8>(yy));
+ return UnpackLow(As<Byte8>(x), As<Byte8>(y));
}
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{
- if(CPUID::supportsMMX2())
- {
- return x86::punpcklbw(x, y);
- }
- else
- {
- int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
- }
+ int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
+ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
{
- if(CPUID::supportsMMX2())
- {
- return x86::punpckhbw(x, y);
- }
- else
- {
- int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
- }
+ int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
+ auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
+ return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
RValue<Int> SignMask(RValue<Byte8> x)
@@ -2199,20 +2245,13 @@
Type *Byte8::getType()
{
- if(CPUID::supportsMMX2())
- {
- return MMX::getType();
- }
- else
- {
- return T(VectorType::get(Byte::getType(), 8));
- }
+ return T(Type_v8i8);
}
SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
{
int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(SByte::getType(), 8))));
+ Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
storeValue(Nucleus::createBitCast(vector, getType()));
}
@@ -2259,26 +2298,12 @@
RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
- }
- else
- {
- return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
- }
+ return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
}
RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
- }
- else
- {
- return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
- }
+ return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
}
// RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
@@ -2383,14 +2408,7 @@
RValue<SByte8> operator~(RValue<SByte8> val)
{
- if(CPUID::supportsMMX2())
- {
- return val ^ SByte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
- }
- else
- {
- return RValue<SByte8>(Nucleus::createNot(val.value));
- }
+ return RValue<SByte8>(Nucleus::createNot(val.value));
}
RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
@@ -2405,32 +2423,15 @@
RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
{
- if(CPUID::supportsMMX2())
- {
- return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
- }
- else
- {
- int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
- }
+ int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
+ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
{
- if(CPUID::supportsMMX2())
- {
- return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
- }
- else
- {
- int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
- }
+ int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
+ auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
+ return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
RValue<Int> SignMask(RValue<SByte8> x)
@@ -2450,14 +2451,7 @@
Type *SByte8::getType()
{
- if(CPUID::supportsMMX2())
- {
- return MMX::getType();
- }
- else
- {
- return T(VectorType::get(SByte::getType(), 8));
- }
+ return T(Type_v8i8);
}
Byte16::Byte16(RValue<Byte16> rhs)
@@ -2502,92 +2496,51 @@
Type *Byte16::getType()
{
- return T(VectorType::get(Byte::getType(), 16));
+ return T(llvm::VectorType::get(T(Byte::getType()), 16));
}
Type *SByte16::getType()
{
- return T( VectorType::get(SByte::getType(), 16));
+ return T(llvm::VectorType::get(T(SByte::getType()), 16));
}
Short2::Short2(RValue<Short4> cast)
{
- storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
+ storeValue(Nucleus::createBitCast(cast.value, getType()));
}
Type *Short2::getType()
{
- #if 0
- return T(VectorType::get(Short::getType(), 2));
- #else
- return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
- #endif
+ return T(Type_v2i16);
}
UShort2::UShort2(RValue<UShort4> cast)
{
- storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
+ storeValue(Nucleus::createBitCast(cast.value, getType()));
}
Type *UShort2::getType()
{
- #if 0
- return T(VectorType::get(UShort::getType(), 2));
- #else
- return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
- #endif
+ return T(Type_v2i16);
}
Short4::Short4(RValue<Int> cast)
{
- Value *extend = Nucleus::createZExt(cast.value, Long::getType());
- Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
+ Value *vector = loadValue();
+ Value *element = Nucleus::createTrunc(cast.value, Short::getType());
+ Value *insert = Nucleus::createInsertElement(vector, element, 0);
+ Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
storeValue(swizzle);
}
Short4::Short4(RValue<Int4> cast)
{
+ int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
- #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd)
- Constant *pack[8];
- pack[0] = Nucleus::createConstantInt(0);
- pack[1] = Nucleus::createConstantInt(2);
- pack[2] = Nucleus::createConstantInt(4);
- pack[3] = Nucleus::createConstantInt(6);
-
- Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
- #else
- Value *packed;
-
- // FIXME: Use Swizzle<Short8>
- if(!CPUID::supportsSSSE3())
- {
- int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
- int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
-
- Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
- Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
- Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
- packed = createSwizzle4(int4, 0x88);
- }
- else
- {
- int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
- Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
- packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
- }
-
- #if 0 // FIXME: No optimal instruction selection
- Value *qword2 = Nucleus::createBitCast(packed, T(VectorType::get(Long::getType(), 2)));
- Value *element = Nucleus::createExtractElement(qword2, 0);
- Value *short4 = Nucleus::createBitCast(element, Short4::getType());
- #else // FIXME: Requires SSE
- Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
- Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
- #endif
- #endif
+ Value *packed = Nucleus::createShuffleVector(short8, short8, select);
+ Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
storeValue(short4);
}
@@ -2607,17 +2560,13 @@
Short4::Short4(short xyzw)
{
int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
-
- storeValue(Nucleus::createBitCast(vector, getType()));
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
Short4::Short4(short x, short y, short z, short w)
{
int64_t constantVector[4] = {x, y, z, w};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
-
- storeValue(Nucleus::createBitCast(vector, getType()));
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
Short4::Short4(RValue<Short4> rhs)
@@ -2700,38 +2649,17 @@
RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::paddw(lhs, rhs);
- }
- else
- {
- return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
- }
+ return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
}
RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::psubw(lhs, rhs);
- }
- else
- {
- return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
- }
+ return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
}
RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::pmullw(lhs, rhs);
- }
- else
- {
- return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
- }
+ return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
}
// RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
@@ -2746,38 +2674,17 @@
RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::pand(lhs, rhs);
- }
- else
- {
- return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
- }
+ return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
}
RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::por(lhs, rhs);
- }
- else
- {
- return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
- }
+ return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
}
RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::pxor(lhs, rhs);
- }
- else
- {
- return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
- }
+ return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
}
RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
@@ -2851,34 +2758,18 @@
RValue<Short4> operator-(RValue<Short4> val)
{
- if(CPUID::supportsMMX2())
- {
- return Short4(0, 0, 0, 0) - val;
- }
- else
- {
- return RValue<Short4>(Nucleus::createNeg(val.value));
- }
+ return RValue<Short4>(Nucleus::createNeg(val.value));
}
RValue<Short4> operator~(RValue<Short4> val)
{
- if(CPUID::supportsMMX2())
- {
- return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
- }
- else
- {
- return RValue<Short4>(Nucleus::createNot(val.value));
- }
+ return RValue<Short4>(Nucleus::createNot(val.value));
}
RValue<Short4> RoundShort4(RValue<Float4> cast)
{
- RValue<Int4> v4i32 = x86::cvtps2dq(cast);
- RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
-
- return As<Short4>(Int2(As<Int4>(v8i16)));
+ RValue<Int4> int4 = RoundInt(cast);
+ return As<Short4>(Pack(int4, int4));
}
RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
@@ -2913,73 +2804,50 @@
RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
{
- return x86::packsswb(x, y);
+ auto result = x86::packsswb(x, y);
+
+ return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
}
RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
{
- if(CPUID::supportsMMX2())
- {
- return x86::punpcklwd(x, y);
- }
- else
- {
- int shuffle[4] = {0, 4, 1, 5};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
- }
+ int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
+ return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
{
- if(CPUID::supportsMMX2())
- {
- return x86::punpckhwd(x, y);
- }
- else
- {
- int shuffle[4] = {2, 6, 3, 7};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
- }
+ int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
+ auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
+ return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
{
- if(CPUID::supportsMMX2())
+ // Real type is v8i16
+ int shuffle[8] =
{
- return x86::pshufw(x, select);
- }
- else
- {
- return RValue<Short4>(createSwizzle4(x.value, select));
- }
+ (select >> 0) & 0x03,
+ (select >> 2) & 0x03,
+ (select >> 4) & 0x03,
+ (select >> 6) & 0x03,
+ (select >> 0) & 0x03,
+ (select >> 2) & 0x03,
+ (select >> 4) & 0x03,
+ (select >> 6) & 0x03,
+ };
+
+ return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
{
- if(CPUID::supportsMMX2())
- {
- return x86::pinsrw(val, Int(element), i);
- }
- else
- {
- return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
- }
+ return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
}
RValue<Short> Extract(RValue<Short4> val, int i)
{
- if(CPUID::supportsMMX2())
- {
- return Short(x86::pextrw(val, i));
- }
- else
- {
- return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
- }
+ return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
}
RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
@@ -2994,14 +2862,7 @@
Type *Short4::getType()
{
- if(CPUID::supportsMMX2())
- {
- return MMX::getType();
- }
- else
- {
- return T(VectorType::get(Short::getType(), 4));
- }
+ return T(Type_v4i16);
}
UShort4::UShort4(RValue<Int4> cast)
@@ -3011,50 +2872,34 @@
UShort4::UShort4(RValue<Float4> cast, bool saturate)
{
- Float4 sat;
-
if(saturate)
{
if(CPUID::supportsSSE4_1())
{
- sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation
+ Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
+ *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
}
else
{
- sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
+ *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
}
}
else
{
- sat = cast;
- }
-
- Int4 int4(sat);
-
- if(!saturate || !CPUID::supportsSSE4_1())
- {
- *this = Short4(int4);
- }
- else
- {
- *this = As<Short4>(Int2(As<Int4>(x86::packusdw(int4, int4))));
+ *this = Short4(Int4(cast));
}
}
UShort4::UShort4(unsigned short xyzw)
{
int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
-
- storeValue(Nucleus::createBitCast(vector, getType()));
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
{
int64_t constantVector[4] = {x, y, z, w};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
-
- storeValue(Nucleus::createBitCast(vector, getType()));
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
UShort4::UShort4(RValue<UShort4> rhs)
@@ -3139,74 +2984,32 @@
RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
- }
+ return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
}
RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
- }
+ return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
}
RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
- }
+ return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
}
RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UShort4>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
- }
+ return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
}
RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UShort4>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
- }
+ return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
}
RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UShort4>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
- }
+ return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
}
RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
@@ -3235,14 +3038,7 @@
RValue<UShort4> operator~(RValue<UShort4> val)
{
- if(CPUID::supportsMMX2())
- {
- return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
- }
- else
- {
- return RValue<UShort4>(Nucleus::createNot(val.value));
- }
+ return RValue<UShort4>(Nucleus::createNot(val.value));
}
RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
@@ -3277,19 +3073,14 @@
RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
{
- return x86::packuswb(x, y);
+ auto result = x86::packuswb(x, y);
+
+ return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
}
Type *UShort4::getType()
{
- if(CPUID::supportsMMX2())
- {
- return MMX::getType();
- }
- else
- {
- return T(VectorType::get(UShort::getType(), 4));
- }
+ return T(Type_v4i16);
}
Short8::Short8(short c)
@@ -3317,15 +3108,10 @@
Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
{
- Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
- Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+ int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16
+ Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
- Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
- long2 = Nucleus::createInsertElement(long2, loLong, 0);
- long2 = Nucleus::createInsertElement(long2, hiLong, 1);
- Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
-
- storeValue(short8);
+ storeValue(packed);
}
RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
@@ -3355,15 +3141,8 @@
RValue<Int4> Abs(RValue<Int4> x)
{
- if(CPUID::supportsSSSE3())
- {
- return x86::pabsd(x);
- }
- else
- {
- Int4 mask = (x >> 31);
- return (mask ^ x) - mask;
- }
+ auto negative = x >> 31;
+ return (x ^ negative) - negative;
}
RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
@@ -3373,7 +3152,7 @@
Type *Short8::getType()
{
- return T(VectorType::get(Short::getType(), 8));
+ return T(llvm::VectorType::get(T(Short::getType()), 8));
}
UShort8::UShort8(unsigned short c)
@@ -3401,15 +3180,10 @@
UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
{
- Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
- Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+ int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16
+ Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
- Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
- long2 = Nucleus::createInsertElement(long2, loLong, 0);
- long2 = Nucleus::createInsertElement(long2, hiLong, 1);
- Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
-
- storeValue(short8);
+ storeValue(packed);
}
RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
@@ -3506,7 +3280,7 @@
Type *UShort8::getType()
{
- return T(VectorType::get(UShort::getType(), 8));
+ return T(llvm::VectorType::get(T(UShort::getType()), 8));
}
Int::Int(Argument<Int> argument)
@@ -4290,19 +4064,13 @@
Int2::Int2(RValue<Int4> cast)
{
- Value *long2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
- Value *element = Nucleus::createExtractElement(long2, Long::getType(), 0);
- Value *int2 = Nucleus::createBitCast(element, Int2::getType());
-
- storeValue(int2);
+ storeValue(Nucleus::createBitCast(cast.value, getType()));
}
Int2::Int2(int x, int y)
{
int64_t constantVector[2] = {x, y};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Int::getType(), 2))));
-
- storeValue(Nucleus::createBitCast(vector, getType()));
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
Int2::Int2(RValue<Int2> rhs)
@@ -4324,26 +4092,10 @@
Int2::Int2(RValue<Int> lo, RValue<Int> hi)
{
- if(CPUID::supportsMMX2())
- {
- // movd mm0, lo
- // movd mm1, hi
- // punpckldq mm0, mm1
+ int shuffle[4] = {0, 4, 1, 5};
+ Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
- Value *loLong = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), lo.value, 0);
- loLong = Nucleus::createInsertElement(loLong, V(ConstantInt::get(Int::getType(), 0)), 1);
- Value *hiLong = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), hi.value, 0);
- hiLong = Nucleus::createInsertElement(hiLong, V(ConstantInt::get(Int::getType(), 0)), 1);
-
- storeValue(As<Int2>(UnpackLow(As<Int2>(loLong), As<Int2>(hiLong))).value);
- }
- else
- {
- int shuffle[2] = {0, 1};
- Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, T(VectorType::get(Int::getType(), 1))), Nucleus::createBitCast(hi.value, T(VectorType::get(Int::getType(), 1))), shuffle);
-
- storeValue(Nucleus::createBitCast(packed, Int2::getType()));
- }
+ storeValue(Nucleus::createBitCast(packed, Int2::getType()));
}
RValue<Int2> Int2::operator=(RValue<Int2> rhs)
@@ -4371,26 +4123,12 @@
RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::paddd(lhs, rhs);
- }
- else
- {
- return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
- }
+ return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
}
RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return x86::psubd(lhs, rhs);
- }
- else
- {
- return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
- }
+ return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
}
// RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
@@ -4410,38 +4148,17 @@
RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
- }
+ return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
}
RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
- }
+ return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
}
RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
- }
+ return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
}
RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
@@ -4520,90 +4237,41 @@
RValue<Int2> operator~(RValue<Int2> val)
{
- if(CPUID::supportsMMX2())
- {
- return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
- }
- else
- {
- return RValue<Int2>(Nucleus::createNot(val.value));
- }
+ return RValue<Int2>(Nucleus::createNot(val.value));
}
RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
{
- if(CPUID::supportsMMX2())
- {
- return x86::punpckldq(x, y);
- }
- else
- {
- int shuffle[2] = {0, 2};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return As<Short4>(packed);
- }
+ int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32
+ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
{
- if(CPUID::supportsMMX2())
- {
- return x86::punpckhdq(x, y);
- }
- else
- {
- int shuffle[2] = {1, 3};
- Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
-
- return As<Short4>(packed);
- }
+ int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32
+ auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
+ return As<Short4>(Swizzle(lowHigh, 0xEE));
}
RValue<Int> Extract(RValue<Int2> val, int i)
{
- if(false) // FIXME: LLVM does not generate optimal code
- {
- return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
- }
- else
- {
- if(i == 0)
- {
- return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), Int::getType(), 0));
- }
- else
- {
- Int2 val2 = As<Int2>(UnpackHigh(val, val));
-
- return Extract(val2, 0);
- }
- }
+ return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
}
RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
{
- return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), element.value, i), Int2::getType()));
+ return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
}
Type *Int2::getType()
{
- if(CPUID::supportsMMX2())
- {
- return MMX::getType();
- }
- else
- {
- return T(VectorType::get(Int::getType(), 2));
- }
+ return T(Type_v2i32);
}
UInt2::UInt2(unsigned int x, unsigned int y)
{
int64_t constantVector[2] = {x, y};
- Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UInt::getType(), 2))));
-
- storeValue(Nucleus::createBitCast(vector, getType()));
+ storeValue(Nucleus::createConstantVector(constantVector, getType()));
}
UInt2::UInt2(RValue<UInt2> rhs)
@@ -4648,26 +4316,12 @@
RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
- }
- else
- {
- return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
- }
+ return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
}
RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
- }
- else
- {
- return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
- }
+ return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
}
// RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
@@ -4687,38 +4341,17 @@
RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
- }
+ return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
}
RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
- }
+ return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
}
RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
{
- if(CPUID::supportsMMX2())
- {
- return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
- }
- else
- {
- return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
- }
+ return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
}
RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
@@ -4797,81 +4430,52 @@
RValue<UInt2> operator~(RValue<UInt2> val)
{
- if(CPUID::supportsMMX2())
- {
- return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
- }
- else
- {
- return RValue<UInt2>(Nucleus::createNot(val.value));
- }
+ return RValue<UInt2>(Nucleus::createNot(val.value));
}
Type *UInt2::getType()
{
- if(CPUID::supportsMMX2())
- {
- return MMX::getType();
- }
- else
- {
- return T(VectorType::get(UInt::getType(), 2));
- }
+ return T(Type_v2i32);
}
Int4::Int4(RValue<Byte4> cast)
{
- Value *x = Nucleus::createBitCast(cast.value, Int::getType());
- Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
-
- Value *e;
-
- if (CPUID::supportsSSE4_1())
+ if(CPUID::supportsSSE4_1())
{
- e = x86::pmovzxbd(RValue<Int4>(a)).value;
+ *this = x86::pmovzxbd(As<Byte16>(cast));
}
else
{
int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
- Value *b = Nucleus::createBitCast(a, Byte16::getType());
- Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
+ Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
+ Value *b = Nucleus::createShuffleVector(a, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
- Value *d = Nucleus::createBitCast(c, Short8::getType());
- e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
- }
+ Value *c = Nucleus::createBitCast(b, Short8::getType());
+ Value *d = Nucleus::createShuffleVector(c, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
- Value *f = Nucleus::createBitCast(e, Int4::getType());
- storeValue(f);
+ *this = As<Int4>(d);
+ }
}
Int4::Int4(RValue<SByte4> cast)
{
- Value *x = Nucleus::createBitCast(cast.value, Int::getType());
- Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
-
- Value *g;
-
- if (CPUID::supportsSSE4_1())
+ if(CPUID::supportsSSE4_1())
{
- g = x86::pmovsxbd(RValue<Int4>(a)).value;
+ *this = x86::pmovsxbd(As<SByte16>(cast));
}
else
{
- int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
- Value *b = Nucleus::createBitCast(a, Byte16::getType());
- Value *c = Nucleus::createShuffleVector(b, b, swizzle);
+ int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
+ Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
+ Value *b = Nucleus::createShuffleVector(a, a, swizzle);
int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
- Value *d = Nucleus::createBitCast(c, Short8::getType());
- Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
+ Value *c = Nucleus::createBitCast(b, Short8::getType());
+ Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
- Value *f = Nucleus::createBitCast(e, Int4::getType());
- // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
- g = x86::psrad(RValue<Int4>(f), 24).value;
+ *this = As<Int4>(d) >> 24;
}
-
- storeValue(g);
}
Int4::Int4(RValue<Float4> cast)
@@ -4883,51 +4487,29 @@
Int4::Int4(RValue<Short4> cast)
{
- Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
- Value *element = Nucleus::createBitCast(cast.value, Long::getType());
- long2 = Nucleus::createInsertElement(long2, element, 0);
- RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
-
if(CPUID::supportsSSE4_1())
{
- storeValue(x86::pmovsxwd(vector).value);
+ *this = x86::pmovsxwd(As<Short8>(cast));
}
else
{
- Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
-
int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
- Value *c = Nucleus::createShuffleVector(b, b, swizzle);
- Value *d = Nucleus::createBitCast(c, Int4::getType());
- storeValue(d);
-
- // Each Short is packed into each Int in the (Short | Short) format.
- // Shifting by 16 will retrieve the original Short value.
- // Shifting an Int will propagate the sign bit, which will work
- // for both positive and negative values of a Short.
- *this >>= 16;
+ Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
+ *this = As<Int4>(c) >> 16;
}
}
Int4::Int4(RValue<UShort4> cast)
{
- Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
- Value *element = Nucleus::createBitCast(cast.value, Long::getType());
- long2 = Nucleus::createInsertElement(long2, element, 0);
- RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
-
if(CPUID::supportsSSE4_1())
{
- storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
+ *this = x86::pmovzxwd(As<UShort8>(cast));
}
else
{
- Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
-
int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
- Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Short8::getType())), swizzle);
- Value *d = Nucleus::createBitCast(c, Int4::getType());
- storeValue(d);
+ Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
+ *this = As<Int4>(c);
}
}
@@ -4993,15 +4575,10 @@
Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
{
- Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
- Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+ int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32
+ Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
- Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
- long2 = Nucleus::createInsertElement(long2, loLong, 0);
- long2 = Nucleus::createInsertElement(long2, hiLong, 1);
- Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
-
- storeValue(int4);
+ storeValue(packed);
}
Int4::Int4(RValue<Int> rhs)
@@ -5270,7 +4847,7 @@
Type *Int4::getType()
{
- return T(VectorType::get(Int::getType(), 4));
+ return T(llvm::VectorType::get(T(Int::getType()), 4));
}
UInt4::UInt4(RValue<Float4> cast)
@@ -5354,15 +4931,10 @@
UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
{
- Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
- Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+ int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32
+ Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
- Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
- long2 = Nucleus::createInsertElement(long2, loLong, 0);
- long2 = Nucleus::createInsertElement(long2, hiLong, 1);
- Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
-
- storeValue(uint4);
+ storeValue(packed);
}
RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
@@ -5585,7 +5157,7 @@
Type *UInt4::getType()
{
- return T(VectorType::get(UInt::getType(), 4));
+ return T(llvm::VectorType::get(T(UInt::getType()), 4));
}
Float::Float(RValue<Int> cast)
@@ -5595,6 +5167,14 @@
storeValue(integer);
}
+ Float::Float(RValue<UInt> cast)
+ {
+ RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
+ As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
+
+ storeValue(result.value);
+ }
+
Float::Float(float x)
{
storeValue(Nucleus::createConstantFloat(x));
@@ -5826,74 +5406,26 @@
Float2::Float2(RValue<Float4> cast)
{
- Value *int64x2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
- Value *int64 = Nucleus::createExtractElement(int64x2, Long::getType(), 0);
- Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
-
- storeValue(float2);
+ storeValue(Nucleus::createBitCast(cast.value, getType()));
}
Type *Float2::getType()
{
- return T(VectorType::get(Float::getType(), 2));
+ return T(Type_v2f32);
}
Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
{
- #if 0
- Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes
- #elif 0
- Value *vector = loadValue();
-
- Value *i8x = Nucleus::createExtractElement(cast.value, 0);
- Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
- Value *x = Nucleus::createInsertElement(vector, f32x, 0);
-
- Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
- Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
- Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
-
- Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
- Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
- Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
-
- Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
- Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
- Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
- #else
- Value *a = Int4(cast).loadValue();
- Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
- #endif
+ Value *a = Int4(cast).loadValue();
+ Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
storeValue(xyzw);
}
Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
{
- #if 0
- Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes
- #elif 0
- Value *vector = loadValue();
-
- Value *i8x = Nucleus::createExtractElement(cast.value, 0);
- Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
- Value *x = Nucleus::createInsertElement(vector, f32x, 0);
-
- Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
- Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
- Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
-
- Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
- Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
- Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
-
- Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
- Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
- Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
- #else
- Value *a = Int4(cast).loadValue();
- Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
- #endif
+ Value *a = Int4(cast).loadValue();
+ Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
storeValue(xyzw);
}
@@ -6102,7 +5634,7 @@
int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
- return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
+ return As<Float4>(result);
}
RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
@@ -6139,9 +5671,9 @@
return x86::sqrtps(x);
}
- RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i)
+ RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
{
- return RValue<Float4>(Nucleus::createInsertElement(val.value, element.value, i));
+ return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
}
RValue<Float> Extract(RValue<Float4> x, int i)
@@ -6182,10 +5714,10 @@
RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
{
Value *vector = lhs.loadValue();
- Value *shuffle = createMask4(vector, rhs.value, select);
- lhs.storeValue(shuffle);
+ Value *result = createMask4(vector, rhs.value, select);
+ lhs.storeValue(result);
- return RValue<Float4>(shuffle);
+ return RValue<Float4>(result);
}
RValue<Int> SignMask(RValue<Float4> x)
@@ -6249,22 +5781,28 @@
}
else
{
- return Float4(Int4(x)); // Rounded toward zero
+ return Float4(Int4(x));
}
}
RValue<Float4> Frac(RValue<Float4> x)
{
+ Float4 frc;
+
if(CPUID::supportsSSE4_1())
{
- return x - x86::floorps(x);
+ frc = x - Floor(x);
}
else
{
- Float4 frc = x - Float4(Int4(x)); // Signed fractional part
+ frc = x - Float4(Int4(x)); // Signed fractional part.
- return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
+ frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
}
+
+ // x - floor(x) can be 1.0 for very small negative x.
+ // Clamp against the value just below 1.0.
+ return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
}
RValue<Float4> Floor(RValue<Float4> x)
@@ -6293,12 +5831,12 @@
Type *Float4::getType()
{
- return T(VectorType::get(Float::getType(), 4));
+ return T(llvm::VectorType::get(T(Float::getType()), 4));
}
RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
{
- return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset)), false));
+ return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
}
RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
@@ -6378,7 +5916,7 @@
RValue<Long> Ticks()
{
- llvm::Function *rdtsc = Intrinsic::getDeclaration(::module, Intrinsic::readcyclecounter);
+ llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
return RValue<Long>(V(::builder->CreateCall(rdtsc)));
}
@@ -6390,7 +5928,7 @@
{
RValue<Int> cvtss2si(RValue<Float> val)
{
- llvm::Function *cvtss2si = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtss2si);
+ llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
Float4 vector;
vector.x = val;
@@ -6398,104 +5936,80 @@
return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
}
- RValue<Int2> cvtps2pi(RValue<Float4> val)
- {
- llvm::Function *cvtps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtps2pi);
-
- return RValue<Int2>(V(::builder->CreateCall(cvtps2pi, val.value)));
- }
-
- RValue<Int2> cvttps2pi(RValue<Float4> val)
- {
- llvm::Function *cvttps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvttps2pi);
-
- return RValue<Int2>(V(::builder->CreateCall(cvttps2pi, val.value)));
- }
-
RValue<Int4> cvtps2dq(RValue<Float4> val)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *cvtps2dq = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_cvtps2dq);
+ llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
- return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
- }
- else
- {
- Int2 lo = x86::cvtps2pi(val);
- Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
-
- return Int4(lo, hi);
- }
+ return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
}
RValue<Float> rcpss(RValue<Float> val)
{
- llvm::Function *rcpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ss);
+ llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
- Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
+ Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0));
}
RValue<Float> sqrtss(RValue<Float> val)
{
- llvm::Function *sqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ss);
+ llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
- Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
+ Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0));
}
RValue<Float> rsqrtss(RValue<Float> val)
{
- llvm::Function *rsqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ss);
+ llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
- Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
+ Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0));
}
RValue<Float4> rcpps(RValue<Float4> val)
{
- llvm::Function *rcpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ps);
+ llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
}
RValue<Float4> sqrtps(RValue<Float4> val)
{
- llvm::Function *sqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ps);
+ llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
}
RValue<Float4> rsqrtps(RValue<Float4> val)
{
- llvm::Function *rsqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ps);
+ llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
}
RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
{
- llvm::Function *maxps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_max_ps);
+ llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
}
RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
{
- llvm::Function *minps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_min_ps);
+ llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
}
RValue<Float> roundss(RValue<Float> val, unsigned char imm)
{
- llvm::Function *roundss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ss);
+ llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
- Value *undef = V(UndefValue::get(Float4::getType()));
+ Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0));
@@ -6513,7 +6027,7 @@
RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
{
- llvm::Function *roundps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ps);
+ llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
}
@@ -6528,397 +6042,151 @@
return roundps(val, 2);
}
- RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
- {
- llvm::Function *cmpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ps);
-
- return RValue<Float4>(V(::builder->CreateCall3(cmpps, x.value, y.value, V(Nucleus::createConstantByte(imm)))));
- }
-
- RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 0);
- }
-
- RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 1);
- }
-
- RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 2);
- }
-
- RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 3);
- }
-
- RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 4);
- }
-
- RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 5);
- }
-
- RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 6);
- }
-
- RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
- {
- return cmpps(x, y, 7);
- }
-
- RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
- {
- llvm::Function *cmpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ss);
-
- Value *vector1 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), x.value, 0);
- Value *vector2 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), y.value, 0);
-
- return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(cmpss, vector1, vector2, V(Nucleus::createConstantByte(imm)))), Float::getType(), 0));
- }
-
- RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 0);
- }
-
- RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 1);
- }
-
- RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 2);
- }
-
- RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 3);
- }
-
- RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 4);
- }
-
- RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 5);
- }
-
- RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 6);
- }
-
- RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
- {
- return cmpss(x, y, 7);
- }
-
RValue<Int4> pabsd(RValue<Int4> x)
{
- llvm::Function *pabsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_ssse3_pabs_d_128);
+ llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
}
RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *paddsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_w);
+ llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
- return As<Short4>(V(::builder->CreateCall2(paddsw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(paddsw, x.value, y.value)));
}
RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *psubsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_w);
+ llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
- return As<Short4>(V(::builder->CreateCall2(psubsw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(psubsw, x.value, y.value)));
}
RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
{
- llvm::Function *paddusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_w);
+ llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
- return As<UShort4>(V(::builder->CreateCall2(paddusw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<UShort4>(V(::builder->CreateCall2(paddusw, x.value, y.value)));
}
RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
{
- llvm::Function *psubusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_w);
+ llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
- return As<UShort4>(V(::builder->CreateCall2(psubusw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<UShort4>(V(::builder->CreateCall2(psubusw, x.value, y.value)));
}
RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
{
- llvm::Function *paddsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_b);
+ llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
- return As<SByte8>(V(::builder->CreateCall2(paddsb, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<SByte8>(V(::builder->CreateCall2(paddsb, x.value, y.value)));
}
RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
{
- llvm::Function *psubsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_b);
+ llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
- return As<SByte8>(V(::builder->CreateCall2(psubsb, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<SByte8>(V(::builder->CreateCall2(psubsb, x.value, y.value)));
}
RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
{
- llvm::Function *paddusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_b);
+ llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
- return As<Byte8>(V(::builder->CreateCall2(paddusb, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Byte8>(V(::builder->CreateCall2(paddusb, x.value, y.value)));
}
RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
{
- llvm::Function *psubusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_b);
+ llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
- return As<Byte8>(V(::builder->CreateCall2(psubusb, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *paddw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_w);
-
- return As<Short4>(V(::builder->CreateCall2(paddw, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *psubw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_w);
-
- return As<Short4>(V(::builder->CreateCall2(psubw, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *pmullw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmull_w);
-
- return As<Short4>(V(::builder->CreateCall2(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *pand = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pand);
-
- return As<Short4>(V(::builder->CreateCall2(pand, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *por = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_por);
-
- return As<Short4>(V(::builder->CreateCall2(por, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *pxor = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pxor);
-
- return As<Short4>(V(::builder->CreateCall2(pxor, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
- {
- llvm::Function *pshufw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_pshuf_w);
-
- return As<Short4>(V(::builder->CreateCall2(pshufw, As<MMX>(x).value, V(Nucleus::createConstantByte(y)))));
- }
-
- RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *punpcklwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklwd);
-
- return As<Int2>(V(::builder->CreateCall2(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
- {
- llvm::Function *punpckhwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhwd);
-
- return As<Int2>(V(::builder->CreateCall2(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
- {
- llvm::Function *pinsrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pinsr_w);
-
- return As<Short4>(V(::builder->CreateCall3(pinsrw, As<MMX>(x).value, y.value, V(Nucleus::createConstantInt(i)))));
- }
-
- RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
- {
- llvm::Function *pextrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pextr_w);
-
- return RValue<Int>(V(::builder->CreateCall2(pextrw, As<MMX>(x).value, V(Nucleus::createConstantInt(i)))));
- }
-
- RValue<Short4> punpckldq(RValue<Int2> x, RValue<Int2> y)
- {
- llvm::Function *punpckldq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckldq);
-
- return As<Short4>(V(::builder->CreateCall2(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> punpckhdq(RValue<Int2> x, RValue<Int2> y)
- {
- llvm::Function *punpckhdq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhdq);
-
- return As<Short4>(V(::builder->CreateCall2(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
- {
- llvm::Function *punpcklbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklbw);
-
- return As<Short4>(V(::builder->CreateCall2(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
- {
- llvm::Function *punpckhbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhbw);
-
- return As<Short4>(V(::builder->CreateCall2(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
- {
- llvm::Function *paddb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_b);
-
- return As<Byte8>(V(::builder->CreateCall2(paddb, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
- {
- llvm::Function *psubb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_b);
-
- return As<Byte8>(V(::builder->CreateCall2(psubb, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
- {
- llvm::Function *paddd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_d);
-
- return As<Int2>(V(::builder->CreateCall2(paddd, As<MMX>(x).value, As<MMX>(y).value)));
- }
-
- RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
- {
- llvm::Function *psubd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_d);
-
- return As<Int2>(V(::builder->CreateCall2(psubd, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Byte8>(V(::builder->CreateCall2(psubusb, x.value, y.value)));
}
RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
{
- llvm::Function *pavgw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pavg_w);
+ llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
- return As<UShort4>(V(::builder->CreateCall2(pavgw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<UShort4>(V(::builder->CreateCall2(pavgw, x.value, y.value)));
}
RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *pmaxsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmaxs_w);
+ llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
- return As<Short4>(V(::builder->CreateCall2(pmaxsw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(pmaxsw, x.value, y.value)));
}
RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *pminsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmins_w);
+ llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
- return As<Short4>(V(::builder->CreateCall2(pminsw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(pminsw, x.value, y.value)));
}
RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *pcmpgtw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_w);
+ llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
- return As<Short4>(V(::builder->CreateCall2(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(pcmpgtw, x.value, y.value)));
}
RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *pcmpeqw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_w);
+ llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
- return As<Short4>(V(::builder->CreateCall2(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(pcmpeqw, x.value, y.value)));
}
RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
{
- llvm::Function *pcmpgtb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_b);
+ llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
- return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, x.value, y.value)));
}
RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
{
- llvm::Function *pcmpeqb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_b);
+ llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
- return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, x.value, y.value)));
}
RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
{
- llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packssdw);
+ llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
- return As<Short4>(V(::builder->CreateCall2(packssdw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
}
RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_packssdw_128);
+ llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
- return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
- }
- else
- {
- Int2 loX = Int2(x);
- Int2 hiX = Int2(Swizzle(x, 0xEE));
-
- Int2 loY = Int2(y);
- Int2 hiY = Int2(Swizzle(y, 0xEE));
-
- Short4 lo = x86::packssdw(loX, hiX);
- Short4 hi = x86::packssdw(loY, hiY);
-
- return Short8(lo, hi);
- }
+ return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
}
RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *packsswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packsswb);
+ llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
- return As<SByte8>(V(::builder->CreateCall2(packsswb, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value)));
}
RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
{
- llvm::Function *packuswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packuswb);
+ llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
- return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Byte8>(V(::builder->CreateCall2(packuswb, x.value, y.value)));
}
RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
{
if(CPUID::supportsSSE4_1())
{
- llvm::Function *packusdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_packusdw);
+ llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
}
@@ -6933,264 +6201,198 @@
RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
{
- llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_w);
+ llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
- return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
+ return As<UShort4>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
{
- llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_w);
+ llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
{
- llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_w);
+ llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
- return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
+ return As<Short4>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
{
- llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_w);
+ llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
{
- llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_w);
+ llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
- return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
+ return As<Short4>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
{
- llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_w);
+ llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
{
- llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_d);
+ llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
- return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
+ return As<Int2>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_d);
+ llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
- return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
- }
- else
- {
- Int2 lo = Int2(x);
- Int2 hi = Int2(Swizzle(x, 0xEE));
-
- lo = x86::pslld(lo, y);
- hi = x86::pslld(hi, y);
-
- return Int4(lo, hi);
- }
+ return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
{
- llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_d);
+ llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
- return As<Int2>(V(::builder->CreateCall2(psrad, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
+ return As<Int2>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_d);
+ llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
- return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
- }
- else
- {
- Int2 lo = Int2(x);
- Int2 hi = Int2(Swizzle(x, 0xEE));
-
- lo = x86::psrad(lo, y);
- hi = x86::psrad(hi, y);
-
- return Int4(lo, hi);
- }
+ return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
{
- llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_d);
+ llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
- return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
+ return As<UInt2>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_d);
+ llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
- return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
- }
- else
- {
- UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
- UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
-
- lo = x86::psrld(lo, y);
- hi = x86::psrld(hi, y);
-
- return UInt4(lo, hi);
- }
+ return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
{
- llvm::Function *pmaxsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxsd);
+ llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
}
RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
{
- llvm::Function *pminsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminsd);
+ llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
}
RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
{
- llvm::Function *pmaxud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxud);
+ llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
}
RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
{
- llvm::Function *pminud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminud);
+ llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
}
RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulh_w);
+ llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
- return As<Short4>(V(::builder->CreateCall2(pmulhw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Short4>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
}
RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
{
- llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulhu_w);
+ llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
- return As<UShort4>(V(::builder->CreateCall2(pmulhuw, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<UShort4>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
}
RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
{
- llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmadd_wd);
+ llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
- return As<Int2>(V(::builder->CreateCall2(pmaddwd, As<MMX>(x).value, As<MMX>(y).value)));
+ return As<Int2>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
}
RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
{
- llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulh_w);
+ llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
}
RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
{
- llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulhu_w);
+ llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
}
RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
{
- llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmadd_wd);
+ llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
}
RValue<Int> movmskps(RValue<Float4> x)
{
- llvm::Function *movmskps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_movmsk_ps);
+ llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
}
RValue<Int> pmovmskb(RValue<Byte8> x)
{
- llvm::Function *pmovmskb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmovmskb);
+ llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
- return RValue<Int>(V(::builder->CreateCall(pmovmskb, As<MMX>(x).value)));
+ return RValue<Int>(V(::builder->CreateCall(pmovmskb, x.value))) & 0xFF;
}
- //RValue<Int2> movd(RValue<Pointer<Int>> x)
- //{
- // Value *element = Nucleus::createLoad(x.value);
-
- //// Value *int2 = UndefValue::get(Int2::getType());
- //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
-
- // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
-
- // return RValue<Int2>(int2);
- //}
-
- //RValue<Int2> movdq2q(RValue<Int4> x)
- //{
- // Value *long2 = Nucleus::createBitCast(x.value, T(VectorType::get(Long::getType(), 2)));
- // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
-
- // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
- //}
-
- RValue<Int4> pmovzxbd(RValue<Int4> x)
+ RValue<Int4> pmovzxbd(RValue<Byte16> x)
{
- llvm::Function *pmovzxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxbd);
+ llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
- return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType()))));
+ return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, x.value)));
}
- RValue<Int4> pmovsxbd(RValue<Int4> x)
+ RValue<Int4> pmovsxbd(RValue<SByte16> x)
{
- llvm::Function *pmovsxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxbd);
+ llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
- return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType()))));
+ return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, x.value)));
}
- RValue<Int4> pmovzxwd(RValue<Int4> x)
+ RValue<Int4> pmovzxwd(RValue<UShort8> x)
{
- llvm::Function *pmovzxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxwd);
+ llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
- return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType()))));
+ return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, x.value)));
}
- RValue<Int4> pmovsxwd(RValue<Int4> x)
+ RValue<Int4> pmovsxwd(RValue<Short8> x)
{
- llvm::Function *pmovsxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxwd);
+ llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
- return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType()))));
- }
-
- void emms()
- {
- llvm::Function *emms = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_emms);
-
- V(::builder->CreateCall(emms));
+ return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, x.value)));
}
}
}
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 831ed40..21e2571 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -15,6 +15,7 @@
#ifndef sw_Nucleus_hpp
#define sw_Nucleus_hpp
+#include <cassert>
#include <cstdarg>
#include <cstdint>
#include <vector>
diff --git a/src/Reactor/Optimizer.cpp b/src/Reactor/Optimizer.cpp
index 38e24ef..2d4ac82 100644
--- a/src/Reactor/Optimizer.cpp
+++ b/src/Reactor/Optimizer.cpp
@@ -17,7 +17,7 @@
#include "src/IceCfg.h"
#include "src/IceCfgNode.h"
-#include <map>
+#include <unordered_map>
#include <vector>
namespace
@@ -38,11 +38,15 @@
void deleteInstruction(Ice::Inst *instruction);
bool isDead(Ice::Inst *instruction);
+ static const Ice::InstIntrinsicCall *asLoadSubVector(const Ice::Inst *instruction);
+ static const Ice::InstIntrinsicCall *asStoreSubVector(const Ice::Inst *instruction);
static bool isLoad(const Ice::Inst &instruction);
static bool isStore(const Ice::Inst &instruction);
static Ice::Operand *storeAddress(const Ice::Inst *instruction);
static Ice::Operand *loadAddress(const Ice::Inst *instruction);
static Ice::Operand *storeData(const Ice::Inst *instruction);
+ static std::size_t storeSize(const Ice::Inst *instruction);
+ static bool loadTypeMatchesStore(const Ice::Inst *load, const Ice::Inst *store);
Ice::Cfg *function;
Ice::GlobalContext *context;
@@ -57,9 +61,9 @@
std::vector<Ice::Inst*> stores;
};
- std::map<Ice::Operand*, Uses> uses;
- std::map<Ice::Inst*, Ice::CfgNode*> node;
- std::map<Ice::Variable*, Ice::Inst*> definition;
+ std::unordered_map<Ice::Operand*, Uses> uses;
+ std::unordered_map<Ice::Inst*, Ice::CfgNode*> node;
+ std::unordered_map<Ice::Variable*, Ice::Inst*> definition;
};
void Optimizer::run(Ice::Cfg *function)
@@ -199,6 +203,11 @@
continue;
}
+ if(!loadTypeMatchesStore(load, store))
+ {
+ continue;
+ }
+
replace(load, storeValue);
for(size_t i = 0; i < addressUses.loads.size(); i++)
@@ -295,6 +304,7 @@
auto &insts = singleBasicBlock->getInsts();
Ice::Inst *store = nullptr;
Ice::Operand *storeValue = nullptr;
+ bool unmatchedLoads = false;
for(Ice::Inst &inst : insts)
{
@@ -310,14 +320,20 @@
continue;
}
- // New store found. If we had a previous one, eliminate it.
- if(store)
+ // New store found. If we had a previous one, try to eliminate it.
+ if(store && !unmatchedLoads)
{
- deleteInstruction(store);
+ // If the previous store is wider than the new one, we can't eliminate it
+ // because there could be a wide load reading its non-overwritten data.
+ if(storeSize(&inst) >= storeSize(store))
+ {
+ deleteInstruction(store);
+ }
}
store = &inst;
storeValue = storeData(store);
+ unmatchedLoads = false;
}
else if(isLoad(inst))
{
@@ -328,10 +344,13 @@
continue;
}
- if(storeValue)
+ if(!loadTypeMatchesStore(load, store))
{
- replace(load, storeValue);
+ unmatchedLoads = true;
+ continue;
}
+
+ replace(load, storeValue);
}
}
}
@@ -464,6 +483,32 @@
return false;
}
+ const Ice::InstIntrinsicCall *Optimizer::asLoadSubVector(const Ice::Inst *instruction)
+ {
+ if(auto *instrinsic = llvm::dyn_cast<Ice::InstIntrinsicCall>(instruction))
+ {
+ if(instrinsic->getIntrinsicInfo().ID == Ice::Intrinsics::LoadSubVector)
+ {
+ return instrinsic;
+ }
+ }
+
+ return nullptr;
+ }
+
+ const Ice::InstIntrinsicCall *Optimizer::asStoreSubVector(const Ice::Inst *instruction)
+ {
+ if(auto *instrinsic = llvm::dyn_cast<Ice::InstIntrinsicCall>(instruction))
+ {
+ if(instrinsic->getIntrinsicInfo().ID == Ice::Intrinsics::StoreSubVector)
+ {
+ return instrinsic;
+ }
+ }
+
+ return nullptr;
+ }
+
bool Optimizer::isLoad(const Ice::Inst &instruction)
{
if(llvm::isa<Ice::InstLoad>(&instruction))
@@ -471,12 +516,7 @@
return true;
}
- if(auto intrinsicCall = llvm::dyn_cast<Ice::InstIntrinsicCall>(&instruction))
- {
- return intrinsicCall->getIntrinsicInfo().ID == Ice::Intrinsics::LoadSubVector;
- }
-
- return false;
+ return asLoadSubVector(&instruction) != nullptr;
}
bool Optimizer::isStore(const Ice::Inst &instruction)
@@ -486,12 +526,7 @@
return true;
}
- if(auto intrinsicCall = llvm::dyn_cast<Ice::InstIntrinsicCall>(&instruction))
- {
- return intrinsicCall->getIntrinsicInfo().ID == Ice::Intrinsics::StoreSubVector;
- }
-
- return false;
+ return asStoreSubVector(&instruction) != nullptr;
}
Ice::Operand *Optimizer::storeAddress(const Ice::Inst *instruction)
@@ -503,12 +538,9 @@
return store->getAddr();
}
- if(auto *instrinsic = llvm::dyn_cast<Ice::InstIntrinsicCall>(instruction))
+ if(auto *storeSubVector = asStoreSubVector(instruction))
{
- if(instrinsic->getIntrinsicInfo().ID == Ice::Intrinsics::StoreSubVector)
- {
- return instrinsic->getSrc(2);
- }
+ return storeSubVector->getSrc(2);
}
return nullptr;
@@ -523,12 +555,9 @@
return load->getSourceAddress();
}
- if(auto *instrinsic = llvm::dyn_cast<Ice::InstIntrinsicCall>(instruction))
+ if(auto *loadSubVector = asLoadSubVector(instruction))
{
- if(instrinsic->getIntrinsicInfo().ID == Ice::Intrinsics::LoadSubVector)
- {
- return instrinsic->getSrc(1);
- }
+ return loadSubVector->getSrc(1);
}
return nullptr;
@@ -543,17 +572,63 @@
return store->getData();
}
- if(auto *instrinsic = llvm::dyn_cast<Ice::InstIntrinsicCall>(instruction))
+ if(auto *storeSubVector = asStoreSubVector(instruction))
{
- if(instrinsic->getIntrinsicInfo().ID == Ice::Intrinsics::StoreSubVector)
- {
- return instrinsic->getSrc(1);
- }
+ return storeSubVector->getSrc(1);
}
return nullptr;
}
+ std::size_t Optimizer::storeSize(const Ice::Inst *store)
+ {
+ assert(isStore(*store));
+
+ if(auto *instStore = llvm::dyn_cast<Ice::InstStore>(store))
+ {
+ return Ice::typeWidthInBytes(instStore->getData()->getType());
+ }
+
+ if(auto *storeSubVector = asStoreSubVector(store))
+ {
+ return llvm::cast<Ice::ConstantInteger32>(storeSubVector->getSrc(3))->getValue();
+ }
+
+ return 0;
+ }
+
+ bool Optimizer::loadTypeMatchesStore(const Ice::Inst *load, const Ice::Inst *store)
+ {
+ if(!load || !store)
+ {
+ return false;
+ }
+
+ assert(isLoad(*load) && isStore(*store));
+ assert(loadAddress(load) == storeAddress(store));
+
+ if(auto *instStore = llvm::dyn_cast<Ice::InstStore>(store))
+ {
+ if(auto *instLoad = llvm::dyn_cast<Ice::InstLoad>(load))
+ {
+ return instStore->getData()->getType() == instLoad->getDest()->getType();
+ }
+ }
+
+ if(auto *storeSubVector = asStoreSubVector(store))
+ {
+ if(auto *loadSubVector = asLoadSubVector(load))
+ {
+ // Check for matching type and sub-vector width.
+ return storeSubVector->getSrc(1)->getType() == loadSubVector->getDest()->getType() &&
+ llvm::cast<Ice::ConstantInteger32>(storeSubVector->getSrc(3))->getValue() ==
+ llvm::cast<Ice::ConstantInteger32>(loadSubVector->getSrc(2))->getValue();
+ }
+ }
+
+ return false;
+ }
+
bool Optimizer::Uses::areOnlyLoadStore() const
{
return size() == (loads.size() + stores.size());
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index b02c6be..46973da 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -18,6 +18,7 @@
#include "Nucleus.hpp"
#include "Routine.hpp"
+#include <cassert>
#include <cstddef>
#include <cwchar>
#undef Bool
@@ -88,8 +89,8 @@
return false;
}
- Value *loadValue(unsigned int alignment = 0) const;
- Value *storeValue(Value *value, unsigned int alignment = 0) const;
+ Value *loadValue() const;
+ Value *storeValue(Value *value) const;
Value *getAddress(Value *index, bool unsignedIndex) const;
};
@@ -1535,6 +1536,7 @@
{
public:
explicit Float(RValue<Int> cast);
+ explicit Float(RValue<UInt> cast);
Float() = default;
Float(float x);
@@ -2103,7 +2105,7 @@
template<class S>
Pointer(const Pointer<S> &pointer, int alignment = 1) : alignment(alignment)
{
- Value *pointerS = pointer.loadValue(alignment);
+ Value *pointerS = pointer.loadValue();
Value *pointerT = Nucleus::createBitCast(pointerS, Nucleus::getPointerType(T::getType()));
LValue<Pointer<T>>::storeValue(pointerT);
}
@@ -2238,15 +2240,15 @@
}
template<class T>
- Value *LValue<T>::loadValue(unsigned int alignment) const
+ Value *LValue<T>::loadValue() const
{
- return Nucleus::createLoad(address, T::getType(), false, alignment);
+ return Nucleus::createLoad(address, T::getType(), false, 0);
}
template<class T>
- Value *LValue<T>::storeValue(Value *value, unsigned int alignment) const
+ Value *LValue<T>::storeValue(Value *value) const
{
- return Nucleus::createStore(value, address, T::getType(), false, alignment);
+ return Nucleus::createStore(value, address, T::getType(), false, 0);
}
template<class T>
@@ -2305,6 +2307,8 @@
template<class T>
RValue<T>::RValue(Value *rvalue)
{
+ assert(Nucleus::createBitCast(rvalue, T::getType()) == rvalue); // Run-time type should match T, so bitcast is no-op.
+
value = rvalue;
}
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index fc70ac2..7e607d9 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -123,6 +123,7 @@
const bool CPUID::ARM = CPUID::detectARM();
const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
const bool emulateIntrinsics = CPUID::ARM;
+ const bool emulateMismatchedBitCast = CPUID::ARM;
}
namespace sw
@@ -288,7 +289,6 @@
}
}
-
return symbolValue;
}
@@ -848,12 +848,43 @@
if(valueType & EmulatedBits)
{
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- load->addArg(ptr);
- load->addArg(::context->getConstantInt32(typeSize(type)));
- ::basicBlock->appendInst(load);
+ if(emulateIntrinsics)
+ {
+ if(typeSize(type) == 4)
+ {
+ auto pointer = RValue<Pointer<Byte>>(ptr);
+ Int x = *Pointer<Int>(pointer);
+
+ Int4 vector;
+ vector = Insert(vector, x, 0);
+
+ auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
+ ::basicBlock->appendInst(bitcast);
+ }
+ else if(typeSize(type) == 8)
+ {
+ auto pointer = RValue<Pointer<Byte>>(ptr);
+ Int x = *Pointer<Int>(pointer);
+ Int y = *Pointer<Int>(pointer + 4);
+
+ Int4 vector;
+ vector = Insert(vector, x, 0);
+ vector = Insert(vector, y, 1);
+
+ auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
+ ::basicBlock->appendInst(bitcast);
+ }
+ else assert(false);
+ }
+ else
+ {
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ load->addArg(ptr);
+ load->addArg(::context->getConstantInt32(typeSize(type)));
+ ::basicBlock->appendInst(load);
+ }
}
else
{
@@ -870,13 +901,46 @@
if(valueType & EmulatedBits)
{
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
- store->addArg(value);
- store->addArg(ptr);
- store->addArg(::context->getConstantInt32(typeSize(type)));
- ::basicBlock->appendInst(store);
+ if(emulateIntrinsics)
+ {
+ if(typeSize(type) == 4)
+ {
+ Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
+ auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
+ ::basicBlock->appendInst(bitcast);
+
+ RValue<Int4> v(V(vector));
+
+ auto pointer = RValue<Pointer<Byte>>(ptr);
+ Int x = Extract(v, 0);
+ *Pointer<Int>(pointer) = x;
+ }
+ else if(typeSize(type) == 8)
+ {
+ Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
+ auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
+ ::basicBlock->appendInst(bitcast);
+
+ RValue<Int4> v(V(vector));
+
+ auto pointer = RValue<Pointer<Byte>>(ptr);
+ Int x = Extract(v, 0);
+ *Pointer<Int>(pointer) = x;
+ Int y = Extract(v, 1);
+ *Pointer<Int>(pointer + 4) = y;
+ }
+ else assert(false);
+ }
+ else
+ {
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
+ store->addArg(value);
+ store->addArg(ptr);
+ store->addArg(::context->getConstantInt32(typeSize(type)));
+ ::basicBlock->appendInst(store);
+ }
}
else
{
@@ -981,6 +1045,25 @@
Value *Nucleus::createBitCast(Value *v, Type *destType)
{
+ // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
+ // support for casting between scalars and wide vectors. For platforms where this is not supported,
+ // emulate them by writing to the stack and reading back as the destination type.
+ if(emulateMismatchedBitCast)
+ {
+ if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
+ {
+ Value *address = allocateStackVariable(destType);
+ createStore(v, address, T(v->getType()));
+ return createLoad(address, destType);
+ }
+ else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
+ {
+ Value *address = allocateStackVariable(T(v->getType()));
+ createStore(v, address, T(v->getType()));
+ return createLoad(address, destType);
+ }
+ }
+
return createCast(Ice::InstCast::Bitcast, v, destType);
}
@@ -2626,36 +2709,85 @@
return RValue<Byte8>(Nucleus::createNot(val.value));
}
+ RValue<Byte> Extract(RValue<Byte8> val, int i)
+ {
+ return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
+ }
+
+ RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
+ {
+ return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
+ RValue<Byte> Saturate(RValue<UShort> x)
+ {
+ return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), Int(x)));
+ }
+
RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- paddusb->addArg(x.value);
- paddusb->addArg(y.value);
- ::basicBlock->appendInst(paddusb);
+ if(emulateIntrinsics)
+ {
+ Byte8 result;
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 0))) + UShort(Int(Extract(y, 0)))), 0);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 1))) + UShort(Int(Extract(y, 1)))), 1);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 2))) + UShort(Int(Extract(y, 2)))), 2);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 3))) + UShort(Int(Extract(y, 3)))), 3);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 4))) + UShort(Int(Extract(y, 4)))), 4);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 5))) + UShort(Int(Extract(y, 5)))), 5);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 6))) + UShort(Int(Extract(y, 6)))), 6);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 7))) + UShort(Int(Extract(y, 7)))), 7);
- return RValue<Byte8>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ paddusb->addArg(x.value);
+ paddusb->addArg(y.value);
+ ::basicBlock->appendInst(paddusb);
+
+ return RValue<Byte8>(V(result));
+ }
}
RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- psubusw->addArg(x.value);
- psubusw->addArg(y.value);
- ::basicBlock->appendInst(psubusw);
+ if(emulateIntrinsics)
+ {
+ Byte8 result;
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 0))) - UShort(Int(Extract(y, 0)))), 0);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 1))) - UShort(Int(Extract(y, 1)))), 1);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 2))) - UShort(Int(Extract(y, 2)))), 2);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 3))) - UShort(Int(Extract(y, 3)))), 3);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 4))) - UShort(Int(Extract(y, 4)))), 4);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 5))) - UShort(Int(Extract(y, 5)))), 5);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 6))) - UShort(Int(Extract(y, 6)))), 6);
+ result = Insert(result, Saturate(UShort(Int(Extract(x, 7))) - UShort(Int(Extract(y, 7)))), 7);
- return RValue<Byte8>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ psubusw->addArg(x.value);
+ psubusw->addArg(y.value);
+ ::basicBlock->appendInst(psubusw);
+
+ return RValue<Byte8>(V(result));
+ }
}
RValue<Short4> Unpack(RValue<Byte4> x)
{
int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; // Real type is v16i8
- return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
+ return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
}
RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
@@ -2666,7 +2798,7 @@
RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
{
int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
- return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
+ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
@@ -2676,16 +2808,64 @@
return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
}
+ RValue<SByte> Extract(RValue<SByte8> val, int i)
+ {
+ return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
+ }
+
+ RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
+ {
+ return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
+ RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
+ {
+ if(emulateIntrinsics)
+ {
+ SByte8 result;
+ result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ #if defined(__i386__) || defined(__x86_64__)
+ // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
+ RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00);
+ RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
+
+ return As<SByte8>(hi | lo);
+ #else
+ return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ #endif
+ }
+ }
+
RValue<Int> SignMask(RValue<Byte8> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
+ return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result)) & 0xFF;
+ }
}
// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
@@ -2866,36 +3046,75 @@
return RValue<SByte8>(Nucleus::createNot(val.value));
}
+ RValue<SByte> Saturate(RValue<Short> x)
+ {
+ return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
+ }
+
RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- paddsb->addArg(x.value);
- paddsb->addArg(y.value);
- ::basicBlock->appendInst(paddsb);
+ if(emulateIntrinsics)
+ {
+ SByte8 result;
+ result = Insert(result, Saturate(Short(Int(Extract(x, 0))) + Short(Int(Extract(y, 0)))), 0);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 1))) + Short(Int(Extract(y, 1)))), 1);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 2))) + Short(Int(Extract(y, 2)))), 2);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 3))) + Short(Int(Extract(y, 3)))), 3);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 4))) + Short(Int(Extract(y, 4)))), 4);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 5))) + Short(Int(Extract(y, 5)))), 5);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 6))) + Short(Int(Extract(y, 6)))), 6);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 7))) + Short(Int(Extract(y, 7)))), 7);
- return RValue<SByte8>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ paddsb->addArg(x.value);
+ paddsb->addArg(y.value);
+ ::basicBlock->appendInst(paddsb);
+
+ return RValue<SByte8>(V(result));
+ }
}
RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- psubsb->addArg(x.value);
- psubsb->addArg(y.value);
- ::basicBlock->appendInst(psubsb);
+ if(emulateIntrinsics)
+ {
+ SByte8 result;
+ result = Insert(result, Saturate(Short(Int(Extract(x, 0))) - Short(Int(Extract(y, 0)))), 0);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 1))) - Short(Int(Extract(y, 1)))), 1);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 2))) - Short(Int(Extract(y, 2)))), 2);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 3))) - Short(Int(Extract(y, 3)))), 3);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 4))) - Short(Int(Extract(y, 4)))), 4);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 5))) - Short(Int(Extract(y, 5)))), 5);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 6))) - Short(Int(Extract(y, 6)))), 6);
+ result = Insert(result, Saturate(Short(Int(Extract(x, 7))) - Short(Int(Extract(y, 7)))), 7);
- return RValue<SByte8>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ psubsb->addArg(x.value);
+ psubsb->addArg(y.value);
+ ::basicBlock->appendInst(psubsb);
+
+ return RValue<SByte8>(V(result));
+ }
}
RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
{
int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
- return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
+ return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
@@ -2907,14 +3126,22 @@
RValue<Int> SignMask(RValue<SByte8> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
+ return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result)) & 0xFF;
+ }
}
RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
@@ -3018,7 +3245,7 @@
Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
Value *packed = Nucleus::createShuffleVector(short8, short8, select);
- Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
+ Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
storeValue(short4);
@@ -3165,12 +3392,38 @@
RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
{
- return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short4 result;
+ result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
{
- return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short4 result;
+ result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
@@ -3270,75 +3523,147 @@
return RValue<Short4>(V(result));
}
+ RValue<Short> Saturate(RValue<Int> x)
+ {
+ return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
+ }
+
RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- paddsw->addArg(x.value);
- paddsw->addArg(y.value);
- ::basicBlock->appendInst(paddsw);
+ if(emulateIntrinsics)
+ {
+ Short4 result;
+ result = Insert(result, Saturate(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
+ result = Insert(result, Saturate(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
+ result = Insert(result, Saturate(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
+ result = Insert(result, Saturate(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
- return RValue<Short4>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ paddsw->addArg(x.value);
+ paddsw->addArg(y.value);
+ ::basicBlock->appendInst(paddsw);
+
+ return RValue<Short4>(V(result));
+ }
}
RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- psubsw->addArg(x.value);
- psubsw->addArg(y.value);
- ::basicBlock->appendInst(psubsw);
+ if(emulateIntrinsics)
+ {
+ Short4 result;
+ result = Insert(result, Saturate(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
+ result = Insert(result, Saturate(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
+ result = Insert(result, Saturate(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
+ result = Insert(result, Saturate(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
- return RValue<Short4>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ psubsw->addArg(x.value);
+ psubsw->addArg(y.value);
+ ::basicBlock->appendInst(psubsw);
+
+ return RValue<Short4>(V(result));
+ }
}
RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- pmulhw->addArg(x.value);
- pmulhw->addArg(y.value);
- ::basicBlock->appendInst(pmulhw);
+ if(emulateIntrinsics)
+ {
+ Short4 result;
+ result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
+ result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
+ result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
+ result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
- return RValue<Short4>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ pmulhw->addArg(x.value);
+ pmulhw->addArg(y.value);
+ ::basicBlock->appendInst(pmulhw);
+
+ return RValue<Short4>(V(result));
+ }
}
RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- pmaddwd->addArg(x.value);
- pmaddwd->addArg(y.value);
- ::basicBlock->appendInst(pmaddwd);
+ if(emulateIntrinsics)
+ {
+ Int2 result;
+ result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
+ result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
- return RValue<Int2>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ pmaddwd->addArg(x.value);
+ pmaddwd->addArg(y.value);
+ ::basicBlock->appendInst(pmaddwd);
+
+ return As<Int2>(V(result));
+ }
}
RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- pack->addArg(x.value);
- pack->addArg(y.value);
- ::basicBlock->appendInst(pack);
+ if(emulateIntrinsics)
+ {
+ SByte8 result;
+ result = Insert(result, Saturate(Extract(x, 0)), 0);
+ result = Insert(result, Saturate(Extract(x, 1)), 1);
+ result = Insert(result, Saturate(Extract(x, 2)), 2);
+ result = Insert(result, Saturate(Extract(x, 3)), 3);
+ result = Insert(result, Saturate(Extract(y, 0)), 4);
+ result = Insert(result, Saturate(Extract(y, 1)), 5);
+ result = Insert(result, Saturate(Extract(y, 2)), 6);
+ result = Insert(result, Saturate(Extract(y, 3)), 7);
- return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ pack->addArg(x.value);
+ pack->addArg(y.value);
+ ::basicBlock->appendInst(pack);
+
+ return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
+ }
}
RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
{
int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
- return RValue<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
+ return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
}
RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
@@ -3538,14 +3863,50 @@
return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
}
+ RValue<UShort> Extract(RValue<UShort4> val, int i)
+ {
+ return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
+ }
+
+ RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
+ {
+ return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
{
- return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort4 result;
+ result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
{
- return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort4 result;
+ result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
@@ -3589,43 +3950,87 @@
return RValue<UShort4>(V(result));
}
+ RValue<UShort> SaturateUShort(RValue<Int> x)
+ {
+ return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
+ }
+
RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- paddusw->addArg(x.value);
- paddusw->addArg(y.value);
- ::basicBlock->appendInst(paddusw);
+ if(emulateIntrinsics)
+ {
+ UShort4 result;
+ result = Insert(result, SaturateUShort(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
+ result = Insert(result, SaturateUShort(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
+ result = Insert(result, SaturateUShort(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
+ result = Insert(result, SaturateUShort(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
- return RValue<UShort4>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ paddusw->addArg(x.value);
+ paddusw->addArg(y.value);
+ ::basicBlock->appendInst(paddusw);
+
+ return RValue<UShort4>(V(result));
+ }
}
RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- psubusw->addArg(x.value);
- psubusw->addArg(y.value);
- ::basicBlock->appendInst(psubusw);
+ if(emulateIntrinsics)
+ {
+ UShort4 result;
+ result = Insert(result, SaturateUShort(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
+ result = Insert(result, SaturateUShort(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
+ result = Insert(result, SaturateUShort(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
+ result = Insert(result, SaturateUShort(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
- return RValue<UShort4>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ psubusw->addArg(x.value);
+ psubusw->addArg(y.value);
+ ::basicBlock->appendInst(psubusw);
+
+ return RValue<UShort4>(V(result));
+ }
}
RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- pmulhuw->addArg(x.value);
- pmulhuw->addArg(y.value);
- ::basicBlock->appendInst(pmulhuw);
+ if(emulateIntrinsics)
+ {
+ UShort4 result;
+ result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
+ result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
+ result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
+ result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
- return RValue<UShort4>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ pmulhuw->addArg(x.value);
+ pmulhuw->addArg(y.value);
+ ::basicBlock->appendInst(pmulhuw);
+
+ return RValue<UShort4>(V(result));
+ }
}
RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
@@ -3635,15 +4040,32 @@
RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- pack->addArg(x.value);
- pack->addArg(y.value);
- ::basicBlock->appendInst(pack);
+ if(emulateIntrinsics)
+ {
+ Byte8 result;
+ result = Insert(result, Saturate(Extract(x, 0)), 0);
+ result = Insert(result, Saturate(Extract(x, 1)), 1);
+ result = Insert(result, Saturate(Extract(x, 2)), 2);
+ result = Insert(result, Saturate(Extract(x, 3)), 3);
+ result = Insert(result, Saturate(Extract(y, 0)), 4);
+ result = Insert(result, Saturate(Extract(y, 1)), 5);
+ result = Insert(result, Saturate(Extract(y, 2)), 6);
+ result = Insert(result, Saturate(Extract(y, 3)), 7);
- return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ pack->addArg(x.value);
+ pack->addArg(y.value);
+ ::basicBlock->appendInst(pack);
+
+ return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
+ }
}
Type *UShort4::getType()
@@ -3692,14 +4114,58 @@
return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
}
+ RValue<Short> Extract(RValue<Short8> val, int i)
+ {
+ return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
+ }
+
+ RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
+ {
+ return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
{
- return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short8 result;
+ result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
{
- return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short8 result;
+ result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
@@ -3782,14 +4248,58 @@
return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
}
+ RValue<UShort> Extract(RValue<UShort8> val, int i)
+ {
+ return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
+ }
+
+ RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
+ {
+ return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
{
- return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort8 result;
+ result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
{
- return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort8 result;
+ result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
@@ -4704,12 +5214,34 @@
RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
{
- return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int2 result;
+ result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
{
- return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int2 result;
+ result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
@@ -4891,14 +5423,46 @@
return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
}
+ RValue<UInt> Extract(RValue<UInt2> val, int i)
+ {
+ return RValue<UInt>(Nucleus::createExtractElement(val.value, UInt::getType(), i));
+ }
+
+ RValue<UInt2> Insert(RValue<UInt2> val, RValue<UInt> element, int i)
+ {
+ return RValue<UInt2>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
{
- return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt2 result;
+ result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
{
- return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt2 result;
+ result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
@@ -4994,18 +5558,15 @@
Value *x = Nucleus::createBitCast(cast.value, Int::getType());
Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
- Value *e;
int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
Value *b = Nucleus::createBitCast(a, Byte16::getType());
Value *c = Nucleus::createShuffleVector(b, b, swizzle);
int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Value *d = Nucleus::createBitCast(c, Short8::getType());
- e = Nucleus::createShuffleVector(d, d, swizzle2);
+ Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
- Value *f = Nucleus::createBitCast(e, Int4::getType());
- Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
- storeValue(g);
+ *this = As<Int4>(e) >> 24;
}
Int4::Int4(RValue<Float4> cast)
@@ -5019,9 +5580,8 @@
{
int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
- Value *d = Nucleus::createBitCast(c, Int4::getType());
- Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
- storeValue(e);
+
+ *this = As<Int4>(c) >> 16;
}
Int4::Int4(RValue<UShort4> cast)
@@ -5185,12 +5745,38 @@
RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
{
- return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int4 result;
+ result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
{
- return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int4 result;
+ result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
@@ -5346,15 +5932,32 @@
RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
- pack->addArg(x.value);
- pack->addArg(y.value);
- ::basicBlock->appendInst(pack);
+ if(emulateIntrinsics)
+ {
+ Short8 result;
+ result = Insert(result, Saturate(Extract(x, 0)), 0);
+ result = Insert(result, Saturate(Extract(x, 1)), 1);
+ result = Insert(result, Saturate(Extract(x, 2)), 2);
+ result = Insert(result, Saturate(Extract(x, 3)), 3);
+ result = Insert(result, Saturate(Extract(y, 0)), 4);
+ result = Insert(result, Saturate(Extract(y, 1)), 5);
+ result = Insert(result, Saturate(Extract(y, 2)), 6);
+ result = Insert(result, Saturate(Extract(y, 3)), 7);
- return RValue<Short8>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ pack->addArg(x.value);
+ pack->addArg(y.value);
+ ::basicBlock->appendInst(pack);
+
+ return RValue<Short8>(V(result));
+ }
}
RValue<Int> Extract(RValue<Int4> x, int i)
@@ -5369,14 +5972,22 @@
RValue<Int> SignMask(RValue<Int4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
+ return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result));
+ }
}
RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
@@ -5536,14 +6147,50 @@
return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
}
+ RValue<UInt> Extract(RValue<UInt4> x, int i)
+ {
+ return RValue<UInt>(Nucleus::createExtractElement(x.value, UInt::getType(), i));
+ }
+
+ RValue<UInt4> Insert(RValue<UInt4> x, RValue<UInt> element, int i)
+ {
+ return RValue<UInt4>(Nucleus::createInsertElement(x.value, element.value, i));
+ }
+
RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
{
- return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt4 result;
+ result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
{
- return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt4 result;
+ result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
@@ -5715,6 +6362,14 @@
storeValue(integer);
}
+ Float::Float(RValue<UInt> cast)
+ {
+ RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
+ As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
+
+ storeValue(result.value);
+ }
+
Float::Float(float x)
{
storeValue(Nucleus::createConstantFloat(x));
@@ -6177,14 +6832,27 @@
RValue<Float4> Sqrt(RValue<Float4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- sqrt->addArg(x.value);
- ::basicBlock->appendInst(sqrt);
+ if(emulateIntrinsics)
+ {
+ Float4 result;
+ result.x = Sqrt(Float(Float4(x).x));
+ result.y = Sqrt(Float(Float4(x).y));
+ result.z = Sqrt(Float(Float4(x).z));
+ result.w = Sqrt(Float(Float4(x).w));
- return RValue<Float4>(V(result));
+ return result;
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ sqrt->addArg(x.value);
+ ::basicBlock->appendInst(sqrt);
+
+ return RValue<Float4>(V(result));
+ }
}
RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
@@ -6238,14 +6906,22 @@
RValue<Int> SignMask(RValue<Float4> x)
{
- Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
- const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
- auto target = ::context->getConstantUndef(Ice::IceType_i32);
- auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
- movmsk->addArg(x.value);
- ::basicBlock->appendInst(movmsk);
+ if(emulateIntrinsics)
+ {
+ Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
+ return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
+ }
+ else
+ {
+ Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
+ movmsk->addArg(x.value);
+ ::basicBlock->appendInst(movmsk);
- return RValue<Int>(V(result));
+ return RValue<Int>(V(result));
+ }
}
RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
@@ -6325,16 +7001,22 @@
RValue<Float4> Frac(RValue<Float4> x)
{
+ Float4 frc;
+
if(CPUID::SSE4_1)
{
- return x - Floor(x);
+ frc = x - Floor(x);
}
else
{
- Float4 frc = x - Float4(Int4(x)); // Signed fractional part
+ frc = x - Float4(Int4(x)); // Signed fractional part.
- return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
+ frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
}
+
+ // x - floor(x) can be 1.0 for very small negative x.
+ // Clamp against the value just below 1.0.
+ return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
}
RValue<Float4> Floor(RValue<Float4> x)
diff --git a/src/Reactor/x86.hpp b/src/Reactor/x86.hpp
index 038a49d..5e759b3 100644
--- a/src/Reactor/x86.hpp
+++ b/src/Reactor/x86.hpp
@@ -22,8 +22,6 @@
namespace x86
{
RValue<Int> cvtss2si(RValue<Float> val);
- RValue<Int2> cvtps2pi(RValue<Float4> val);
- RValue<Int2> cvttps2pi(RValue<Float4> val);
RValue<Int4> cvtps2dq(RValue<Float4> val);
RValue<Float> rcpss(RValue<Float> val);
@@ -44,26 +42,6 @@
RValue<Float4> floorps(RValue<Float4> val);
RValue<Float4> ceilps(RValue<Float4> val);
- RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm);
- RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y);
- RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y);
- RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y);
- RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y);
- RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y);
- RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y);
- RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y);
- RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y);
-
- RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm);
- RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y);
- RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y);
- RValue<Float> cmpless(RValue<Float> x, RValue<Float> y);
- RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y);
- RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y);
- RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y);
- RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y);
- RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y);
-
RValue<Int4> pabsd(RValue<Int4> x);
RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y);
@@ -75,26 +53,6 @@
RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y);
RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y);
- RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y);
- RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y);
- RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y);
- RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y);
- RValue<Short4> por(RValue<Short4> x, RValue<Short4> y);
- RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y);
- RValue<Short4> pshufw(RValue<Short4> x, unsigned char y);
- RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y);
- RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y);
- RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i);
- RValue<Int> pextrw(RValue<Short4> x, unsigned int i);
- RValue<Short4> punpckldq(RValue<Int2> x, RValue<Int2> y);
- RValue<Short4> punpckhdq(RValue<Int2> x, RValue<Int2> y);
- RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y);
- RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y);
- RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y);
- RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y);
- RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y);
- RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y);
-
RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y);
RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y);
@@ -141,12 +99,10 @@
RValue<Int> movmskps(RValue<Float4> x);
RValue<Int> pmovmskb(RValue<Byte8> x);
- RValue<Int4> pmovzxbd(RValue<Int4> x);
- RValue<Int4> pmovsxbd(RValue<Int4> x);
- RValue<Int4> pmovzxwd(RValue<Int4> x);
- RValue<Int4> pmovsxwd(RValue<Int4> x);
-
- void emms();
+ RValue<Int4> pmovzxbd(RValue<Byte16> x);
+ RValue<Int4> pmovsxbd(RValue<SByte16> x);
+ RValue<Int4> pmovzxwd(RValue<UShort8> x);
+ RValue<Int4> pmovsxwd(RValue<Short8> x);
}
}
diff --git a/src/Renderer/BUILD.gn b/src/Renderer/BUILD.gn
index 301b106..a8ad847 100644
--- a/src/Renderer/BUILD.gn
+++ b/src/Renderer/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_renderer_private_config") {
if (is_win) {
@@ -25,12 +27,10 @@
"-msse2",
"-Wno-sign-compare",
]
-
- defines = [ "LOG_TAG=\"swiftshader_renderer\"" ]
}
}
-source_set("swiftshader_renderer") {
+swiftshader_source_set("swiftshader_renderer") {
deps = [
"../Shader:swiftshader_shader",
]
@@ -55,11 +55,7 @@
"VertexProcessor.cpp",
]
- if (is_win) {
- configs -= [ "//build/config/win:unicode" ]
- }
-
- configs += [ ":swiftshader_renderer_private_config" ]
+ configs = [ ":swiftshader_renderer_private_config" ]
include_dirs = [
".",
diff --git a/src/Renderer/Blitter.cpp b/src/Renderer/Blitter.cpp
index c93b09f..0c4a160 100644
--- a/src/Renderer/Blitter.cpp
+++ b/src/Renderer/Blitter.cpp
@@ -260,11 +260,11 @@
c.w = float(0xFFFF);
break;
case FORMAT_R32I:
- c.x = Float(Int(*Pointer<Int>(element)));
+ c.x = Float(*Pointer<Int>(element));
c.w = float(0x7FFFFFFF);
break;
case FORMAT_R32UI:
- c.x = Float(Int(*Pointer<UInt>(element)));
+ c.x = Float(*Pointer<UInt>(element));
c.w = float(0xFFFFFFFF);
break;
case FORMAT_A8R8G8B8:
@@ -359,13 +359,13 @@
c.w = float(0xFFFF);
break;
case FORMAT_G32R32I:
- c.x = Float(Int(*Pointer<Int>(element + 0)));
- c.y = Float(Int(*Pointer<Int>(element + 4)));
+ c.x = Float(*Pointer<Int>(element + 0));
+ c.y = Float(*Pointer<Int>(element + 4));
c.w = float(0x7FFFFFFF);
break;
case FORMAT_G32R32UI:
- c.x = Float(Int(*Pointer<UInt>(element + 0)));
- c.y = Float(Int(*Pointer<UInt>(element + 4)));
+ c.x = Float(*Pointer<UInt>(element + 0));
+ c.y = Float(*Pointer<UInt>(element + 4));
c.w = float(0xFFFFFFFF);
break;
case FORMAT_A32B32G32R32F:
@@ -835,24 +835,18 @@
c = Insert(c, Int(*Pointer<UShort>(element)), 0);
break;
case FORMAT_A32B32G32R32I:
+ case FORMAT_A32B32G32R32UI:
c = *Pointer<Int4>(element);
break;
case FORMAT_X32B32G32R32I:
+ case FORMAT_X32B32G32R32UI:
c = Insert(c, *Pointer<Int>(element + 8), 2);
case FORMAT_G32R32I:
+ case FORMAT_G32R32UI:
c = Insert(c, *Pointer<Int>(element + 4), 1);
case FORMAT_R32I:
- c = Insert(c, *Pointer<Int>(element), 0);
- break;
- case FORMAT_A32B32G32R32UI:
- c = *Pointer<UInt4>(element);
- break;
- case FORMAT_X32B32G32R32UI:
- c = Insert(c, Int(*Pointer<UInt>(element + 8)), 2);
- case FORMAT_G32R32UI:
- c = Insert(c, Int(*Pointer<UInt>(element + 4)), 1);
case FORMAT_R32UI:
- c = Insert(c, Int(*Pointer<UInt>(element)), 0);
+ c = Insert(c, *Pointer<Int>(element), 0);
break;
default:
return false;
diff --git a/src/Renderer/Context.cpp b/src/Renderer/Context.cpp
index caa4592..e5ee4dc 100644
--- a/src/Renderer/Context.cpp
+++ b/src/Renderer/Context.cpp
@@ -33,6 +33,7 @@
bool fullPixelPositionRegister = false;
bool leadingVertexFirst = false; // Flat shading uses first vertex, else last
bool secondaryColor = false; // Specular lighting is applied after texturing
+ bool colorsDefaultToZero = false;
bool forceWindowed = false;
bool quadLayoutEnabled = false;
diff --git a/src/Renderer/PixelProcessor.cpp b/src/Renderer/PixelProcessor.cpp
index 172e8ef..db11aed 100644
--- a/src/Renderer/PixelProcessor.cpp
+++ b/src/Renderer/PixelProcessor.cpp
@@ -444,6 +444,15 @@
else ASSERT(false);
}
+ void PixelProcessor::setHighPrecisionFiltering(unsigned int sampler, bool highPrecisionFiltering)
+ {
+ if(sampler < TEXTURE_IMAGE_UNITS)
+ {
+ context->sampler[sampler].setHighPrecisionFiltering(highPrecisionFiltering);
+ }
+ else ASSERT(false);
+ }
+
void PixelProcessor::setSwizzleR(unsigned int sampler, SwizzleType swizzleR)
{
if(sampler < TEXTURE_IMAGE_UNITS)
diff --git a/src/Renderer/PixelProcessor.hpp b/src/Renderer/PixelProcessor.hpp
index 94d52d3..dd54b72 100644
--- a/src/Renderer/PixelProcessor.hpp
+++ b/src/Renderer/PixelProcessor.hpp
@@ -231,6 +231,7 @@
void setMipmapLOD(unsigned int sampler, float bias);
void setBorderColor(unsigned int sampler, const Color<float> &borderColor);
void setMaxAnisotropy(unsigned int sampler, float maxAnisotropy);
+ void setHighPrecisionFiltering(unsigned int sampler, bool highPrecisionFiltering);
void setSwizzleR(unsigned int sampler, SwizzleType swizzleR);
void setSwizzleG(unsigned int sampler, SwizzleType swizzleG);
void setSwizzleB(unsigned int sampler, SwizzleType swizzleB);
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index a84423d..252d744 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -48,6 +48,7 @@
extern bool fullPixelPositionRegister;
extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
extern bool secondaryColor; // Specular lighting is applied after texturing
+ extern bool colorsDefaultToZero;
extern bool forceWindowed;
extern bool complementaryDepthBuffer;
@@ -110,6 +111,7 @@
sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
sw::leadingVertexFirst = conventions.leadingVertexFirst;
sw::secondaryColor = conventions.secondaryColor;
+ sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
sw::exactColorRounding = exactColorRounding;
setRenderTarget(0, 0);
@@ -670,9 +672,15 @@
}
}
- void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
+ void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
{
- blitter->clear(pixel, format, dest, dRect, rgbaMask);
+ SliceRect rect = clearRect;
+ int samples = dest->getDepth();
+
+ for(rect.slice = 0; rect.slice < samples; rect.slice++)
+ {
+ blitter->clear(value, format, dest, rect, rgbaMask);
+ }
}
void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
@@ -2314,6 +2322,18 @@
}
}
+ void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
+ {
+ if(type == SAMPLER_PIXEL)
+ {
+ PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
+ }
+ else
+ {
+ VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
+ }
+ }
+
void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
{
if(type == SAMPLER_PIXEL)
diff --git a/src/Renderer/Renderer.hpp b/src/Renderer/Renderer.hpp
index c59dd31..d796475 100644
--- a/src/Renderer/Renderer.hpp
+++ b/src/Renderer/Renderer.hpp
@@ -65,6 +65,7 @@
bool fullPixelPositionRegister;
bool leadingVertexFirst;
bool secondaryColor;
+ bool colorsDefaultToZero;
};
static const Conventions OpenGL =
@@ -74,7 +75,8 @@
true, // booleanFaceRegister
true, // fullPixelPositionRegister
false, // leadingVertexFirst
- false // secondaryColor
+ false, // secondaryColor
+ true, // colorsDefaultToZero
};
static const Conventions Direct3D =
@@ -85,6 +87,7 @@
false, // fullPixelPositionRegister
true, // leadingVertexFirst
true, // secondardyColor
+ false, // colorsDefaultToZero
};
struct Query
@@ -323,7 +326,7 @@
void draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update = true);
- void clear(void* pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask);
+ void clear(void *value, Format format, Surface *dest, const Rect &rect, unsigned int rgbaMask);
void blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil = false);
void blit3D(Surface *source, Surface *dest);
@@ -345,6 +348,7 @@
void setMipmapLOD(SamplerType type, int sampler, float bias);
void setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor);
void setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy);
+ void setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering);
void setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR);
void setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG);
void setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB);
diff --git a/src/Renderer/Sampler.cpp b/src/Renderer/Sampler.cpp
index e2447e0..24734da 100644
--- a/src/Renderer/Sampler.cpp
+++ b/src/Renderer/Sampler.cpp
@@ -60,6 +60,7 @@
mipmapFilterState = MIPMAP_NONE;
sRGB = false;
gather = false;
+ highPrecisionFiltering = false;
swizzleR = SWIZZLE_RED;
swizzleG = SWIZZLE_GREEN;
@@ -97,6 +98,7 @@
state.swizzleG = swizzleG;
state.swizzleB = swizzleB;
state.swizzleA = swizzleA;
+ state.highPrecisionFiltering = highPrecisionFiltering;
#if PERF_PROFILE
state.compressedFormat = Surface::isCompressed(externalTextureFormat);
@@ -205,8 +207,15 @@
mipmap.onePitchP[2] = 1;
mipmap.onePitchP[3] = pitchP;
+ mipmap.pitchP[0] = pitchP;
+ mipmap.pitchP[1] = pitchP;
+ mipmap.pitchP[2] = pitchP;
+ mipmap.pitchP[3] = pitchP;
+
mipmap.sliceP[0] = sliceP;
mipmap.sliceP[1] = sliceP;
+ mipmap.sliceP[2] = sliceP;
+ mipmap.sliceP[3] = sliceP;
if(internalTextureFormat == FORMAT_YV12_BT601 ||
internalTextureFormat == FORMAT_YV12_BT709 ||
@@ -298,6 +307,11 @@
texture.maxAnisotropy = maxAnisotropy;
}
+ void Sampler::setHighPrecisionFiltering(bool highPrecisionFiltering)
+ {
+ this->highPrecisionFiltering = highPrecisionFiltering;
+ }
+
void Sampler::setSwizzleR(SwizzleType swizzleR)
{
this->swizzleR = swizzleR;
diff --git a/src/Renderer/Sampler.hpp b/src/Renderer/Sampler.hpp
index 4c4973d..6fae602 100644
--- a/src/Renderer/Sampler.hpp
+++ b/src/Renderer/Sampler.hpp
@@ -36,7 +36,8 @@
short height[4];
short depth[4];
short onePitchP[4];
- int sliceP[2];
+ int4 pitchP;
+ int4 sliceP;
};
struct Texture
@@ -140,6 +141,7 @@
SwizzleType swizzleG : BITS(SWIZZLE_LAST);
SwizzleType swizzleB : BITS(SWIZZLE_LAST);
SwizzleType swizzleA : BITS(SWIZZLE_LAST);
+ bool highPrecisionFiltering : 1;
#if PERF_PROFILE
bool compressedFormat : 1;
@@ -163,6 +165,7 @@
void setReadSRGB(bool sRGB);
void setBorderColor(const Color<float> &borderColor);
void setMaxAnisotropy(float maxAnisotropy);
+ void setHighPrecisionFiltering(bool highPrecisionFiltering);
void setSwizzleR(SwizzleType swizzleR);
void setSwizzleG(SwizzleType swizzleG);
void setSwizzleB(SwizzleType swizzleB);
@@ -202,6 +205,7 @@
MipmapType mipmapFilterState;
bool sRGB;
bool gather;
+ bool highPrecisionFiltering;
SwizzleType swizzleR;
SwizzleType swizzleG;
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 6615b4b..6bcc657 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -2984,14 +2984,14 @@
case FORMAT_X8B8G8R8I:
case FORMAT_G8R8I:
case FORMAT_R8I:
- case FORMAT_A8B8G8R8UI:
- case FORMAT_X8B8G8R8UI:
- case FORMAT_G8R8UI:
- case FORMAT_R8UI:
case FORMAT_A16B16G16R16I:
case FORMAT_X16B16G16R16I:
case FORMAT_G16R16I:
case FORMAT_R16I:
+ case FORMAT_A32B32G32R32I:
+ case FORMAT_X32B32G32R32I:
+ case FORMAT_G32R32I:
+ case FORMAT_R32I:
return true;
default:
return false;
@@ -3002,14 +3002,14 @@
{
switch(format)
{
+ case FORMAT_A8B8G8R8UI:
+ case FORMAT_X8B8G8R8UI:
+ case FORMAT_G8R8UI:
+ case FORMAT_R8UI:
case FORMAT_A16B16G16R16UI:
case FORMAT_X16B16G16R16UI:
case FORMAT_G16R16UI:
case FORMAT_R16UI:
- case FORMAT_A32B32G32R32I:
- case FORMAT_X32B32G32R32I:
- case FORMAT_G32R32I:
- case FORMAT_R32I:
case FORMAT_A32B32G32R32UI:
case FORMAT_X32B32G32R32UI:
case FORMAT_G32R32UI:
@@ -3193,14 +3193,14 @@
resource->unlock();
}
- bool Surface::isEntire(const SliceRect& rect) const
+ bool Surface::isEntire(const Rect& rect) const
{
return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
}
- SliceRect Surface::getRect() const
+ Rect Surface::getRect() const
{
- return SliceRect(0, 0, internal.width, internal.height, 0);
+ return Rect(0, 0, internal.width, internal.height);
}
void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp
index b54565e..6418c08 100644
--- a/src/Renderer/Surface.hpp
+++ b/src/Renderer/Surface.hpp
@@ -299,8 +299,8 @@
inline int getMultiSampleCount() const;
inline int getSuperSampleCount() const;
- bool isEntire(const SliceRect& rect) const;
- SliceRect getRect() const;
+ bool isEntire(const Rect& rect) const;
+ Rect getRect() const;
void clearDepth(float depth, int x0, int y0, int width, int height);
void clearStencil(unsigned char stencil, unsigned char mask, int x0, int y0, int width, int height);
void fill(const Color<float> &color, int x0, int y0, int width, int height);
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index 91c4d34..6972d94 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -602,6 +602,15 @@
else ASSERT(false);
}
+ void VertexProcessor::setHighPrecisionFiltering(unsigned int sampler, bool highPrecisionFiltering)
+ {
+ if(sampler < TEXTURE_IMAGE_UNITS)
+ {
+ context->sampler[sampler].setHighPrecisionFiltering(highPrecisionFiltering);
+ }
+ else ASSERT(false);
+ }
+
void VertexProcessor::setSwizzleR(unsigned int sampler, SwizzleType swizzleR)
{
if(sampler < VERTEX_TEXTURE_IMAGE_UNITS)
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index 278c9b1..3552f84 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -258,6 +258,7 @@
void setMipmapLOD(unsigned int sampler, float bias);
void setBorderColor(unsigned int sampler, const Color<float> &borderColor);
void setMaxAnisotropy(unsigned int stage, float maxAnisotropy);
+ void setHighPrecisionFiltering(unsigned int sampler, bool highPrecisionFiltering);
void setSwizzleR(unsigned int sampler, SwizzleType swizzleR);
void setSwizzleG(unsigned int sampler, SwizzleType swizzleG);
void setSwizzleB(unsigned int sampler, SwizzleType swizzleB);
diff --git a/src/Shader/BUILD.gn b/src/Shader/BUILD.gn
index d1323f7..3b19766 100644
--- a/src/Shader/BUILD.gn
+++ b/src/Shader/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_shader_private_config") {
if (is_win) {
@@ -24,12 +26,10 @@
if (is_clang) {
cflags += [ "-Wno-sign-compare" ]
}
- } else {
- defines = [ "LOG_TAG=\"swiftshader_shader\"" ]
}
}
-source_set("swiftshader_shader") {
+swiftshader_source_set("swiftshader_shader") {
deps = [
"../Main:swiftshader_main",
]
@@ -50,7 +50,7 @@
"VertexShader.cpp",
]
- configs += [ ":swiftshader_shader_private_config" ]
+ configs = [ ":swiftshader_shader_private_config" ]
include_dirs = [
".",
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index ecf164f..948f103 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -846,8 +846,8 @@
reg = v[2 + i];
break;
case Shader::PARAMETER_MISCTYPE:
- if(src.index == 0) reg = vPos;
- if(src.index == 1) reg = vFace;
+ if(src.index == Shader::VPosIndex) reg = vPos;
+ if(src.index == Shader::VFaceIndex) reg = vFace;
break;
case Shader::PARAMETER_SAMPLER:
if(src.rel.type == Shader::PARAMETER_VOID)
diff --git a/src/Shader/PixelShader.cpp b/src/Shader/PixelShader.cpp
index 0b78c14..c659248 100644
--- a/src/Shader/PixelShader.cpp
+++ b/src/Shader/PixelShader.cpp
@@ -700,11 +700,11 @@
{
unsigned char index = instruction[i]->dst.index;
- if(index == 0)
+ if(index == Shader::VPosIndex)
{
vPosDeclared = true;
}
- else if(index == 1)
+ else if(index == Shader::VFaceIndex)
{
vFaceDeclared = true;
}
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 1d7ff47..62f76fa 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -50,6 +50,8 @@
namespace sw
{
+ extern bool colorsDefaultToZero;
+
SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state)
{
}
@@ -186,6 +188,7 @@
if(fixed12 && state.textureFilter != FILTER_GATHER)
{
int componentCount = textureComponentCount();
+ short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000;
switch(state.textureFormat)
{
@@ -237,8 +240,8 @@
case FORMAT_YV12_BT601:
case FORMAT_YV12_BT709:
case FORMAT_YV12_JFIF:
- if(componentCount < 2) c.y = Short4(0x1000);
- if(componentCount < 3) c.z = Short4(0x1000);
+ if(componentCount < 2) c.y = Short4(defaultColorValue);
+ if(componentCount < 3) c.z = Short4(defaultColorValue);
if(componentCount < 4) c.w = Short4(0x1000);
break;
case FORMAT_A8:
@@ -259,9 +262,9 @@
c.z = c.x;
break;
case FORMAT_R32F:
- c.y = Short4(0x1000);
+ c.y = Short4(defaultColorValue);
case FORMAT_G32R32F:
- c.z = Short4(0x1000);
+ c.z = Short4(defaultColorValue);
case FORMAT_X32B32G32R32F:
c.w = Short4(0x1000);
case FORMAT_A32B32G32R32F:
@@ -314,7 +317,9 @@
}
else
{
- if(hasFloatTexture()) // FIXME: Mostly identical to integer sampling
+ // FIXME: YUV and sRGB are not supported by the floating point path
+ bool forceFloatFiltering = state.highPrecisionFiltering && !state.sRGB && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
+ if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering) // FIXME: Mostly identical to integer sampling
{
Float4 uuuu = u;
Float4 vvvv = v;
@@ -353,36 +358,17 @@
}
sampleFloatFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
- }
- else
- {
- Vector4s cs;
- sampleTexture(texture, cs, u, v, w, q, dsx, dsy, offset, function, false);
-
- for(int component = 0; component < textureComponentCount(); component++)
+ if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture())
{
if(has16bitTextureFormat())
{
switch(state.textureFormat)
{
case FORMAT_R5G6B5:
- if(state.sRGB)
- {
- sRGBtoLinear16_5_12(cs.x);
- sRGBtoLinear16_6_12(cs.y);
- sRGBtoLinear16_5_12(cs.z);
-
- convertSigned12(c.x, cs.x);
- convertSigned12(c.y, cs.y);
- convertSigned12(c.z, cs.z);
- }
- else
- {
- c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
- c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
- c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
- }
+ c.x *= Float4(1.0f / 0xF800);
+ c.y *= Float4(1.0f / 0xFC00);
+ c.z *= Float4(1.0f / 0xF800);
break;
default:
ASSERT(false);
@@ -390,57 +376,72 @@
}
else
{
- switch(state.textureFormat)
+ for(int component = 0; component < textureComponentCount(); component++)
{
- case FORMAT_R8I:
- case FORMAT_G8R8I:
- case FORMAT_X8B8G8R8I:
- case FORMAT_A8B8G8R8I:
- c[component] = As<Float4>(Int4(cs[component]) >> 8);
- break;
- case FORMAT_R8UI:
- case FORMAT_G8R8UI:
- case FORMAT_X8B8G8R8UI:
- case FORMAT_A8B8G8R8UI:
- c[component] = As<Float4>(Int4(As<UShort4>(cs[component]) >> 8));
- break;
- case FORMAT_R16I:
- case FORMAT_G16R16I:
- case FORMAT_X16B16G16R16I:
- case FORMAT_A16B16G16R16I:
- c[component] = As<Float4>(Int4(cs[component]));
- break;
- case FORMAT_R16UI:
- case FORMAT_G16R16UI:
- case FORMAT_X16B16G16R16UI:
- case FORMAT_A16B16G16R16UI:
- c[component] = As<Float4>(Int4(As<UShort4>(cs[component])));
- break;
- default:
- // Normalized integer formats
- if(state.sRGB && isRGBComponent(component))
+ c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
+ }
+ }
+ }
+ }
+ else
+ {
+ Vector4s cs;
+
+ sampleTexture(texture, cs, u, v, w, q, dsx, dsy, offset, function, false);
+
+ if(has16bitTextureFormat())
+ {
+ switch(state.textureFormat)
+ {
+ case FORMAT_R5G6B5:
+ if(state.sRGB)
+ {
+ sRGBtoLinear16_5_12(cs.x);
+ sRGBtoLinear16_6_12(cs.y);
+ sRGBtoLinear16_5_12(cs.z);
+
+ convertSigned12(c.x, cs.x);
+ convertSigned12(c.y, cs.y);
+ convertSigned12(c.z, cs.z);
+ }
+ else
+ {
+ c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
+ c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
+ c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
+ }
+ break;
+ default:
+ ASSERT(false);
+ }
+ }
+ else
+ {
+ for(int component = 0; component < textureComponentCount(); component++)
+ {
+ // Normalized integer formats
+ if(state.sRGB && isRGBComponent(component))
+ {
+ sRGBtoLinear16_8_12(cs[component]); // FIXME: Perform linearization at surface level for read-only textures
+ convertSigned12(c[component], cs[component]);
+ }
+ else
+ {
+ if(hasUnsignedTextureComponent(component))
{
- sRGBtoLinear16_8_12(cs[component]); // FIXME: Perform linearization at surface level for read-only textures
- convertSigned12(c[component], cs[component]);
+ convertUnsigned16(c[component], cs[component]);
}
else
{
- if(hasUnsignedTextureComponent(component))
- {
- convertUnsigned16(c[component], cs[component]);
- }
- else
- {
- convertSigned15(c[component], cs[component]);
- }
+ convertSigned15(c[component], cs[component]);
}
- break;
}
}
}
}
int componentCount = textureComponentCount();
+ float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f;
if(state.textureFilter != FILTER_GATHER)
{
@@ -498,8 +499,8 @@
case FORMAT_YV12_BT601:
case FORMAT_YV12_BT709:
case FORMAT_YV12_JFIF:
- if(componentCount < 2) c.y = Float4(1.0f);
- if(componentCount < 3) c.z = Float4(1.0f);
+ if(componentCount < 2) c.y = Float4(defaultColorValue);
+ if(componentCount < 3) c.z = Float4(defaultColorValue);
if(componentCount < 4) c.w = Float4(1.0f);
break;
case FORMAT_A8:
@@ -520,9 +521,9 @@
c.z = c.x;
break;
case FORMAT_R32F:
- c.y = Float4(1.0f);
+ c.y = Float4(defaultColorValue);
case FORMAT_G32R32F:
- c.z = Float4(1.0f);
+ c.z = Float4(defaultColorValue);
case FORMAT_X32B32G32R32F:
c.w = Float4(1.0f);
case FORMAT_A32B32G32R32F:
@@ -801,24 +802,19 @@
if(state.textureFilter == FILTER_POINT || texelFetch)
{
- sampleTexel(c, uuuu, vvvv, wwww, offset, mipmap, buffer, function);
+ c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
}
else
{
- Vector4s c0;
- Vector4s c1;
- Vector4s c2;
- Vector4s c3;
-
Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
- sampleTexel(c0, uuuu0, vvvv0, wwww, offset, mipmap, buffer, function);
- sampleTexel(c1, uuuu1, vvvv0, wwww, offset, mipmap, buffer, function);
- sampleTexel(c2, uuuu0, vvvv1, wwww, offset, mipmap, buffer, function);
- sampleTexel(c3, uuuu1, vvvv1, wwww, offset, mipmap, buffer, function);
+ Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function);
+ Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function);
+ Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function);
+ Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function);
if(!gather) // Blend
{
@@ -997,7 +993,7 @@
if(state.textureFilter == FILTER_POINT || texelFetch)
{
- sampleTexel(c_, uuuu, vvvv, wwww, offset, mipmap, buffer, function);
+ c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
}
else
{
@@ -1069,7 +1065,7 @@
{
for(int k = 0; k < 2; k++)
{
- sampleTexel(c[i][j][k], u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function);
+ c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function);
if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
@@ -1256,39 +1252,36 @@
selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
- bool texelFetch = (function == Fetch);
+ Int4 x0, x1, y0, y1, z0;
+ Float4 fu, fv;
+ Int4 filter = computeFilterOffset(lod);
+ address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
+ address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
+ address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
- Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
- Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
- Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
-
- if(state.textureFilter == FILTER_POINT || texelFetch)
+ Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
+ y0 *= pitchP;
+ if(hasThirdCoordinate())
{
- sampleTexel(c, uuuu, vvvv, wwww, offset, w, mipmap, buffer, function);
+ Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
+ z0 *= sliceP;
+ }
+
+ if(state.textureFilter == FILTER_POINT || (function == Fetch))
+ {
+ c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
}
else
{
- Vector4f c0;
- Vector4f c1;
- Vector4f c2;
- Vector4f c3;
+ y1 *= pitchP;
- Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
- Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
- Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
- Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
-
- sampleTexel(c0, uuuu0, vvvv0, wwww, offset, w, mipmap, buffer, function);
- sampleTexel(c1, uuuu1, vvvv0, wwww, offset, w, mipmap, buffer, function);
- sampleTexel(c2, uuuu0, vvvv1, wwww, offset, w, mipmap, buffer, function);
- sampleTexel(c3, uuuu1, vvvv1, wwww, offset, w, mipmap, buffer, function);
+ Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
+ Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
+ Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
+ Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
if(!gather) // Blend
{
- // Fractions
- Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth)));
- Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight)));
-
if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
@@ -1324,47 +1317,35 @@
selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
- bool texelFetch = (function == Fetch);
+ Int4 x0, x1, y0, y1, z0, z1;
+ Float4 fu, fv, fw;
+ Int4 filter = computeFilterOffset(lod);
+ address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
+ address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
+ address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
- Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
- Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
- Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
+ Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
+ Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
+ y0 *= pitchP;
+ z0 *= sliceP;
- if(state.textureFilter == FILTER_POINT || texelFetch)
+ if(state.textureFilter == FILTER_POINT || (function == Fetch))
{
- sampleTexel(c, uuuu, vvvv, wwww, offset, w, mipmap, buffer, function);
+ c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
}
else
{
- Vector4f &c0 = c;
- Vector4f c1;
- Vector4f c2;
- Vector4f c3;
- Vector4f c4;
- Vector4f c5;
- Vector4f c6;
- Vector4f c7;
+ y1 *= pitchP;
+ z1 *= sliceP;
- Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
- Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
- Short4 wwww0 = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, -1, lod);
- Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
- Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
- Short4 wwww1 = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, +1, lod);
-
- sampleTexel(c0, uuuu0, vvvv0, wwww0, offset, w, mipmap, buffer, function);
- sampleTexel(c1, uuuu1, vvvv0, wwww0, offset, w, mipmap, buffer, function);
- sampleTexel(c2, uuuu0, vvvv1, wwww0, offset, w, mipmap, buffer, function);
- sampleTexel(c3, uuuu1, vvvv1, wwww0, offset, w, mipmap, buffer, function);
- sampleTexel(c4, uuuu0, vvvv0, wwww1, offset, w, mipmap, buffer, function);
- sampleTexel(c5, uuuu1, vvvv0, wwww1, offset, w, mipmap, buffer, function);
- sampleTexel(c6, uuuu0, vvvv1, wwww1, offset, w, mipmap, buffer, function);
- sampleTexel(c7, uuuu1, vvvv1, wwww1, offset, w, mipmap, buffer, function);
-
- // Fractions
- Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth)));
- Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight)));
- Float4 fw = Frac(Float4(As<UShort4>(wwww0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fDepth)));
+ Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
+ Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
+ Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
+ Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
+ Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function);
+ Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function);
+ Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function);
+ Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function);
// Blend first slice
if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
@@ -1399,13 +1380,23 @@
if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w);
// Blend slices
- if(componentCount >= 1) c0.x = c0.x + fw * (c4.x - c0.x);
- if(componentCount >= 2) c0.y = c0.y + fw * (c4.y - c0.y);
- if(componentCount >= 3) c0.z = c0.z + fw * (c4.z - c0.z);
- if(componentCount >= 4) c0.w = c0.w + fw * (c4.w - c0.w);
+ if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x);
+ if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y);
+ if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z);
+ if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w);
}
}
+ Float SamplerCore::log2sqrt(Float lod)
+ {
+ // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2).
+ lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
+ lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
+ lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length).
+
+ return lod;
+ }
+
void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
{
if(function != Lod && function != Fetch)
@@ -1451,10 +1442,7 @@
lod *= Rcp_pp(anisotropy * anisotropy);
}
- // log2(sqrt(lod))
- lod = Float(As<Int>(lod));
- lod -= Float(0x3F800000);
- lod *= As<Float>(Int(0x33800000));
+ lod = log2sqrt(lod); // log2(sqrt(lod))
if(function == Bias)
{
@@ -1510,10 +1498,7 @@
lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis
}
- // log2(sqrt(lod))
- lod = Float(As<Int>(lod));
- lod -= Float(0x3F800000);
- lod *= As<Float>(Int(0x33800000));
+ lod = log2sqrt(lod); // log2(sqrt(lod))
if(function == Bias)
{
@@ -1577,10 +1562,7 @@
lod = Max(Float(dudxy.x), Float(dudxy.y)); // FIXME: Max(dudxy.x, dudxy.y);
- // log2(sqrt(lod))
- lod = Float(As<Int>(lod));
- lod -= Float(0x3F800000);
- lod *= As<Float>(Int(0x33800000));
+ lod = log2sqrt(lod); // log2(sqrt(lod))
if(function == Bias)
{
@@ -1700,7 +1682,7 @@
uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
- if((state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY))
+ if(hasThirdCoordinate())
{
if(state.textureType != TEXTURE_2D_ARRAY)
{
@@ -1730,7 +1712,7 @@
if(texelFetch)
{
Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP)));
- if((state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY))
+ if(hasThirdCoordinate())
{
size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
}
@@ -1744,11 +1726,24 @@
}
}
- void SamplerCore::sampleTexel(Vector4s &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+ void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function)
{
- UInt index[4];
+ UInt4 indices = uuuu + vvvv;
- computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
+ if(hasThirdCoordinate())
+ {
+ indices += As<UInt4>(wwww);
+ }
+
+ for(int i = 0; i < 4; i++)
+ {
+ index[i] = Extract(As<Int4>(indices), i);
+ }
+ }
+
+ Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
+ {
+ Vector4s c;
int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
@@ -1789,8 +1784,7 @@
switch(state.textureFormat)
{
case FORMAT_A8R8G8B8:
- c.z = c.x;
- c.z = As<Short4>(UnpackLow(c.z, c.y));
+ c.z = As<Short4>(UnpackLow(c.x, c.y));
c.x = As<Short4>(UnpackHigh(c.x, c.y));
c.y = c.z;
c.w = c.x;
@@ -1801,19 +1795,35 @@
break;
case FORMAT_A8B8G8R8:
case FORMAT_A8B8G8R8I:
- case FORMAT_A8B8G8R8UI:
case FORMAT_A8B8G8R8I_SNORM:
case FORMAT_Q8W8V8U8:
case FORMAT_SRGB8_A8:
- c.z = c.x;
+ c.z = As<Short4>(UnpackHigh(c.x, c.y));
c.x = As<Short4>(UnpackLow(c.x, c.y));
- c.z = As<Short4>(UnpackHigh(c.z, c.y));
c.y = c.x;
c.w = c.z;
c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
+ // Propagate sign bit
+ if(state.textureFormat == FORMAT_A8B8G8R8I)
+ {
+ c.x >>= 8;
+ c.y >>= 8;
+ c.z >>= 8;
+ c.w >>= 8;
+ }
+ break;
+ case FORMAT_A8B8G8R8UI:
+ c.z = As<Short4>(UnpackHigh(c.x, c.y));
+ c.x = As<Short4>(UnpackLow(c.x, c.y));
+ c.y = c.x;
+ c.w = c.z;
+ c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
+ c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
+ c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
+ c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
break;
default:
ASSERT(false);
@@ -1832,8 +1842,7 @@
switch(state.textureFormat)
{
case FORMAT_X8R8G8B8:
- c.z = c.x;
- c.z = As<Short4>(UnpackLow(c.z, c.y));
+ c.z = As<Short4>(UnpackLow(c.x, c.y));
c.x = As<Short4>(UnpackHigh(c.x, c.y));
c.y = c.z;
c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
@@ -1841,18 +1850,31 @@
c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
break;
case FORMAT_X8B8G8R8I_SNORM:
- case FORMAT_X8B8G8R8UI:
case FORMAT_X8B8G8R8I:
case FORMAT_X8B8G8R8:
case FORMAT_X8L8V8U8:
case FORMAT_SRGB8_X8:
- c.z = c.x;
+ c.z = As<Short4>(UnpackHigh(c.x, c.y));
c.x = As<Short4>(UnpackLow(c.x, c.y));
- c.z = As<Short4>(UnpackHigh(c.z, c.y));
c.y = c.x;
c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
+ // Propagate sign bit
+ if(state.textureFormat == FORMAT_X8B8G8R8I)
+ {
+ c.x >>= 8;
+ c.y >>= 8;
+ c.z >>= 8;
+ }
+ break;
+ case FORMAT_X8B8G8R8UI:
+ c.z = As<Short4>(UnpackHigh(c.x, c.y));
+ c.x = As<Short4>(UnpackLow(c.x, c.y));
+ c.y = c.x;
+ c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
+ c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
+ c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
break;
default:
ASSERT(false);
@@ -1868,14 +1890,20 @@
switch(state.textureFormat)
{
case FORMAT_G8R8:
- case FORMAT_G8R8I:
- case FORMAT_G8R8UI:
case FORMAT_G8R8I_SNORM:
case FORMAT_V8U8:
case FORMAT_A8L8:
c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8);
c.x = (c.x & Short4(0x00FFu)) | (c.x << 8);
break;
+ case FORMAT_G8R8I:
+ c.y = c.x >> 8;
+ c.x = (c.x << 8) >> 8; // Propagate sign bit
+ break;
+ case FORMAT_G8R8UI:
+ c.y = As<Short4>(As<UShort4>(c.x) >> 8);
+ c.x &= Short4(0x00FFu);
+ break;
default:
ASSERT(false);
}
@@ -1887,7 +1915,25 @@
Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
- c.x = Unpack(As<Byte4>(c0));
+
+ switch(state.textureFormat)
+ {
+ case FORMAT_R8I:
+ case FORMAT_R8UI:
+ {
+ Int zero(0);
+ c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
+ // Propagate sign bit
+ if(state.textureFormat == FORMAT_R8I)
+ {
+ c.x = (c.x << 8) >> 8;
+ }
+ }
+ break;
+ default:
+ c.x = Unpack(As<Byte4>(c0));
+ break;
+ }
}
break;
default:
@@ -1924,7 +1970,19 @@
ASSERT(false);
}
}
- else if(hasYuvFormat())
+ else ASSERT(false);
+
+ return c;
+ }
+
+ Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+ {
+ Vector4s c;
+
+ UInt index[4];
+ computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
+
+ if(hasYuvFormat())
{
// Generic YPbPr to RGB transformation
// R = Y + 2 * (1 - Kr) * Pr
@@ -2018,66 +2076,111 @@
c.y = Min(g, UShort4(0x3FFF)) << 2;
c.z = Min(b, UShort4(0x3FFF)) << 2;
}
- else ASSERT(false);
+ else
+ {
+ return sampleTexel(index, buffer);
+ }
+
+ return c;
}
- void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+ Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
{
+ Vector4f c;
+
UInt index[4];
+ computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
- computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
-
- int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
- int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
- int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
- int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
-
- // Read texels
- switch(textureComponentCount())
+ if(hasFloatTexture() || has32bitIntegerTextureComponents())
{
- case 4:
- c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
- c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
- c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
- c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
- transpose4x4(c.x, c.y, c.z, c.w);
- break;
- case 3:
- ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
- c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
- c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
- c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
- c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
- transpose4x3(c.x, c.y, c.z, c.w);
- c.w = Float4(1.0f);
- break;
- case 2:
- // FIXME: Optimal shuffling?
- c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
- c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
- c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
- c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
- c.y = c.x;
- c.x = Float4(c.x.xz, c.z.xz);
- c.y = Float4(c.y.yw, c.z.yw);
- break;
- case 1:
- // FIXME: Optimal shuffling?
- c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
- c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
- c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
- c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
+ int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
+ int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
+ int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
+ int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
- if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER)
+ // Read texels
+ switch(textureComponentCount())
{
- Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f));
+ case 4:
+ c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
+ c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
+ c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
+ c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+ transpose4x4(c.x, c.y, c.z, c.w);
+ break;
+ case 3:
+ ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
+ c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
+ c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
+ c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
+ c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+ transpose4x3(c.x, c.y, c.z, c.w);
+ c.w = Float4(1.0f);
+ break;
+ case 2:
+ // FIXME: Optimal shuffling?
+ c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
+ c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
+ c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
+ c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
+ c.y = c.x;
+ c.x = Float4(c.x.xz, c.z.xz);
+ c.y = Float4(c.y.yw, c.z.yw);
+ break;
+ case 1:
+ // FIXME: Optimal shuffling?
+ c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
+ c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
+ c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
+ c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
- c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f))); // FIXME: Only less-equal?
+ if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER)
+ {
+ Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f));
+
+ c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f))); // FIXME: Only less-equal?
+ }
+ break;
+ default:
+ ASSERT(false);
}
- break;
- default:
- ASSERT(false);
}
+ else
+ {
+ ASSERT(!hasYuvFormat());
+
+ Vector4s cs = sampleTexel(index, buffer);
+
+ bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat);
+ int componentCount = textureComponentCount();
+ for(int n = 0; n < componentCount; ++n)
+ {
+ if(hasUnsignedTextureComponent(n))
+ {
+ if(isInteger)
+ {
+ c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
+ }
+ else
+ {
+ c[n] = Float4(As<UShort4>(cs[n]));
+ }
+ }
+ else
+ {
+ if(isInteger)
+ {
+ c[n] = As<Float4>(Int4(cs[n]));
+ }
+ else
+ {
+ c[n] = Float4(cs[n]);
+ }
+ }
+ }
+ }
+
+ return c;
}
void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
@@ -2121,6 +2224,21 @@
}
}
+ Int4 SamplerCore::computeFilterOffset(Float &lod)
+ {
+ Int4 filtering((state.textureFilter == FILTER_POINT) ? 0 : 1);
+ if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
+ {
+ filtering &= CmpNLE(Float4(lod), Float4(0.0f));
+ }
+ else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
+ {
+ filtering &= CmpLE(Float4(lod), Float4(0.0f));
+ }
+
+ return filtering;
+ }
+
Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap)
{
if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
@@ -2163,6 +2281,119 @@
}
}
+ void SamplerCore::address(Float4 &uvw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
+ {
+ if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
+ {
+ return; // Unused
+ }
+
+ Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16));
+ Int4 maxXYZ = dim - Int4(1);
+
+ if(function == Fetch)
+ {
+ xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
+ }
+ else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers
+ {
+ xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
+ }
+ else
+ {
+ const int halfBits = 0x3effffff; // Value just under 0.5f
+ const int oneBits = 0x3f7fffff; // Value just under 1.0f
+ const int twoBits = 0x3fffffff; // Value just under 2.0f
+
+ Float4 coord = Float4(dim);
+ switch(addressingMode)
+ {
+ case ADDRESSING_CLAMP:
+ {
+ Float4 one = As<Float4>(Int4(oneBits));
+ coord *= Min(Max(uvw, Float4(0.0f)), one);
+ }
+ break;
+ case ADDRESSING_MIRROR:
+ {
+ Float4 half = As<Float4>(Int4(halfBits));
+ Float4 one = As<Float4>(Int4(oneBits));
+ Float4 two = As<Float4>(Int4(twoBits));
+ coord *= one - Abs(two * Frac(uvw * half) - one);
+ }
+ break;
+ case ADDRESSING_MIRRORONCE:
+ {
+ Float4 half = As<Float4>(Int4(halfBits));
+ Float4 one = As<Float4>(Int4(oneBits));
+ Float4 two = As<Float4>(Int4(twoBits));
+ coord *= one - Abs(two * Frac(Min(Max(uvw, -one), two) * half) - one);
+ }
+ break;
+ default: // Wrap (or border)
+ coord *= Frac(uvw);
+ break;
+ }
+
+ xyz0 = Int4(coord);
+
+ if(function.option == Offset)
+ {
+ xyz0 += As<Int4>(texOffset);
+ switch(addressingMode)
+ {
+ case ADDRESSING_MIRROR:
+ case ADDRESSING_MIRRORONCE:
+ case ADDRESSING_BORDER:
+ // FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
+ case ADDRESSING_CLAMP:
+ xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
+ break;
+ default: // Wrap
+ xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
+ break;
+ }
+ }
+
+ if(state.textureFilter != FILTER_POINT) // Compute 2nd coordinate, if needed
+ {
+ bool gather = state.textureFilter == FILTER_GATHER;
+
+ xyz1 = xyz0 + filter; // Increment
+
+ if(!gather)
+ {
+ Float4 frac = Frac(coord);
+ f = Abs(frac - Float4(0.5f));
+ xyz1 -= CmpLT(frac, Float4(0.5f)) & (filter + filter); // Decrement xyz if necessary
+ }
+
+ switch(addressingMode)
+ {
+ case ADDRESSING_MIRROR:
+ case ADDRESSING_MIRRORONCE:
+ case ADDRESSING_BORDER:
+ // FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
+ case ADDRESSING_CLAMP:
+ xyz1 = gather ? Min(xyz1, maxXYZ) : Min(Max(xyz1, Int4(0)), maxXYZ);
+ break;
+ default: // Wrap
+ {
+ // The coordinates overflow or underflow by at most 1
+ Int4 over = CmpNLT(xyz1, dim);
+ xyz1 = (over & Int4(0)) | (~over & xyz1); // xyz >= dim ? 0 : xyz
+ if(!gather)
+ {
+ Int4 under = CmpLT(xyz1, Int4(0));
+ xyz1 = (under & maxXYZ) | (~under & xyz1); // xyz < 0 ? dim - 1 : xyz
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+
void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf)
{
cs = RoundShort4(cf * Float4(0x1000));
@@ -2240,6 +2471,11 @@
return Surface::isFloatFormat(state.textureFormat);
}
+ bool SamplerCore::hasUnnormalizedIntegerTexture() const
+ {
+ return Surface::isNonNormalizedInteger(state.textureFormat);
+ }
+
bool SamplerCore::hasUnsignedTextureComponent(int component) const
{
return Surface::isUnsignedComponent(state.textureFormat, component);
@@ -2250,6 +2486,11 @@
return Surface::componentCount(state.textureFormat);
}
+ bool SamplerCore::hasThirdCoordinate() const
+ {
+ return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY);
+ }
+
bool SamplerCore::has16bitTextureFormat() const
{
switch(state.textureFormat)
@@ -2469,6 +2710,79 @@
return false;
}
+ bool SamplerCore::has32bitIntegerTextureComponents() const
+ {
+ switch(state.textureFormat)
+ {
+ case FORMAT_R5G6B5:
+ case FORMAT_R8I_SNORM:
+ case FORMAT_G8R8I_SNORM:
+ case FORMAT_X8B8G8R8I_SNORM:
+ case FORMAT_A8B8G8R8I_SNORM:
+ case FORMAT_R8I:
+ case FORMAT_R8UI:
+ case FORMAT_G8R8I:
+ case FORMAT_G8R8UI:
+ case FORMAT_X8B8G8R8I:
+ case FORMAT_X8B8G8R8UI:
+ case FORMAT_A8B8G8R8I:
+ case FORMAT_A8B8G8R8UI:
+ case FORMAT_G8R8:
+ case FORMAT_X8R8G8B8:
+ case FORMAT_X8B8G8R8:
+ case FORMAT_A8R8G8B8:
+ case FORMAT_A8B8G8R8:
+ case FORMAT_SRGB8_X8:
+ case FORMAT_SRGB8_A8:
+ case FORMAT_V8U8:
+ case FORMAT_Q8W8V8U8:
+ case FORMAT_X8L8V8U8:
+ case FORMAT_L16:
+ case FORMAT_G16R16:
+ case FORMAT_A16B16G16R16:
+ case FORMAT_R16I:
+ case FORMAT_R16UI:
+ case FORMAT_G16R16I:
+ case FORMAT_G16R16UI:
+ case FORMAT_X16B16G16R16I:
+ case FORMAT_X16B16G16R16UI:
+ case FORMAT_A16B16G16R16I:
+ case FORMAT_A16B16G16R16UI:
+ case FORMAT_V16U16:
+ case FORMAT_A16W16V16U16:
+ case FORMAT_Q16W16V16U16:
+ case FORMAT_R32F:
+ case FORMAT_G32R32F:
+ case FORMAT_X32B32G32R32F:
+ case FORMAT_A32B32G32R32F:
+ case FORMAT_A8:
+ case FORMAT_R8:
+ case FORMAT_L8:
+ case FORMAT_A8L8:
+ case FORMAT_D32F:
+ case FORMAT_D32F_LOCKABLE:
+ case FORMAT_D32FS8_TEXTURE:
+ case FORMAT_D32FS8_SHADOW:
+ case FORMAT_YV12_BT601:
+ case FORMAT_YV12_BT709:
+ case FORMAT_YV12_JFIF:
+ return false;
+ case FORMAT_R32I:
+ case FORMAT_R32UI:
+ case FORMAT_G32R32I:
+ case FORMAT_G32R32UI:
+ case FORMAT_X32B32G32R32I:
+ case FORMAT_X32B32G32R32UI:
+ case FORMAT_A32B32G32R32I:
+ case FORMAT_A32B32G32R32UI:
+ return true;
+ default:
+ ASSERT(false);
+ }
+
+ return false;
+ }
+
bool SamplerCore::hasYuvFormat() const
{
switch(state.textureFormat)
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp
index 0f90cac..9f8e85b 100644
--- a/src/Shader/SamplerCore.hpp
+++ b/src/Shader/SamplerCore.hpp
@@ -69,16 +69,22 @@
void sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
void sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
void sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
+ Float log2sqrt(Float lod);
void computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &x, Float4 &y, Float4 &z, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
void cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z);
Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
- void sampleTexel(Vector4s &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
- void sampleTexel(Vector4f &c, Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
+ void computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function);
+ Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
+ Vector4f sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
+ Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer[4]);
+ Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD);
Short4 address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap);
+ void address(Float4 &uw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
+ Int4 computeFilterOffset(Float &lod);
void convertFixed12(Short4 &ci, Float4 &cf);
void convertFixed12(Vector4s &cs, Vector4f &cf);
@@ -90,11 +96,14 @@
void sRGBtoLinear16_5_12(Short4 &c);
bool hasFloatTexture() const;
+ bool hasUnnormalizedIntegerTexture() const;
bool hasUnsignedTextureComponent(int component) const;
int textureComponentCount() const;
+ bool hasThirdCoordinate() const;
bool has16bitTextureFormat() const;
bool has8bitTextureComponents() const;
bool has16bitTextureComponents() const;
+ bool has32bitIntegerTextureComponents() const;
bool hasYuvFormat() const;
bool isRGBComponent(int component) const;
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp
index c861069..ff1482e 100644
--- a/src/Shader/Shader.cpp
+++ b/src/Shader/Shader.cpp
@@ -1059,9 +1059,14 @@
case PARAMETER_LOOP: return "aL";
// case PARAMETER_TEMPFLOAT16: return "";
case PARAMETER_MISCTYPE:
- if(index == 0) return "vPos";
- else if(index == 1) return "vFace";
- else ASSERT(false);
+ switch(index)
+ {
+ case VPosIndex: return "vPos";
+ case VFaceIndex: return "vFace";
+ case InstanceIDIndex: return "iID";
+ case VertexIDIndex: return "vID";
+ default: ASSERT(false);
+ }
case PARAMETER_LABEL: return "l";
case PARAMETER_PREDICATE: return "p0";
case PARAMETER_FLOAT4LITERAL: return "";
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp
index f41d514..ee69e8b 100644
--- a/src/Shader/Shader.hpp
+++ b/src/Shader/Shader.hpp
@@ -358,6 +358,14 @@
PARAMETER_VOID
};
+ enum MiscParameterIndex
+ {
+ VPosIndex = 0,
+ VFaceIndex = 1,
+ InstanceIDIndex = 2,
+ VertexIDIndex = 3,
+ };
+
enum Modifier
{
MODIFIER_NONE,
diff --git a/src/Shader/VertexPipeline.cpp b/src/Shader/VertexPipeline.cpp
index 8db3ca0..8792884 100644
--- a/src/Shader/VertexPipeline.cpp
+++ b/src/Shader/VertexPipeline.cpp
@@ -158,7 +158,7 @@
return dst;
}
- void VertexPipeline::pipeline()
+ void VertexPipeline::pipeline(UInt &index)
{
Vector4f position;
Vector4f normal;
diff --git a/src/Shader/VertexPipeline.hpp b/src/Shader/VertexPipeline.hpp
index e8b954c..e3c0cbe 100644
--- a/src/Shader/VertexPipeline.hpp
+++ b/src/Shader/VertexPipeline.hpp
@@ -30,7 +30,7 @@
virtual ~VertexPipeline();
private:
- void pipeline() override;
+ void pipeline(UInt &index) override;
void processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position);
void processPointSize();
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 26d61e0..c9ed8aa 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -64,7 +64,7 @@
}
}
- void VertexProgram::pipeline()
+ void VertexProgram::pipeline(UInt& index)
{
for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
{
@@ -73,7 +73,7 @@
if(!state.preTransformed)
{
- program();
+ program(index);
}
else
{
@@ -81,7 +81,7 @@
}
}
- void VertexProgram::program()
+ void VertexProgram::program(UInt& index)
{
// shader->print("VertexShader-%0.8X.txt", state.shaderID);
@@ -95,6 +95,21 @@
enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
}
+ if(shader->isVertexIdDeclared())
+ {
+ if(state.textureSampling)
+ {
+ vertexID = Int4(index);
+ }
+ else
+ {
+ vertexID = Insert(vertexID, As<Int>(index), 0);
+ vertexID = Insert(vertexID, As<Int>(index + 1), 1);
+ vertexID = Insert(vertexID, As<Int>(index + 2), 2);
+ vertexID = Insert(vertexID, As<Int>(index + 3), 3);
+ }
+ }
+
// Create all call site return blocks up front
for(size_t i = 0; i < shader->getLength(); i++)
{
@@ -721,7 +736,15 @@
}
break;
case Shader::PARAMETER_MISCTYPE:
- reg.x = As<Float>(Int(instanceID));
+ if(src.index == Shader::InstanceIDIndex)
+ {
+ reg.x = As<Float>(instanceID);
+ }
+ else if(src.index == Shader::VertexIDIndex)
+ {
+ reg.x = As<Float4>(vertexID);
+ }
+ else ASSERT(false);
return reg;
default:
ASSERT(false);
@@ -861,7 +884,17 @@
case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break;
case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
- case Shader::PARAMETER_MISCTYPE: a = As<Float4>(Int4(instanceID)); break;
+ case Shader::PARAMETER_MISCTYPE:
+ if(src.rel.index == Shader::InstanceIDIndex)
+ {
+ a = As<Float4>(Int4(instanceID)); break;
+ }
+ else if(src.rel.index == Shader::VertexIDIndex)
+ {
+ a = As<Float4>(vertexID); break;
+ }
+ else ASSERT(false);
+ break;
default: ASSERT(false);
}
diff --git a/src/Shader/VertexProgram.hpp b/src/Shader/VertexProgram.hpp
index bcf4a20..b537af3 100644
--- a/src/Shader/VertexProgram.hpp
+++ b/src/Shader/VertexProgram.hpp
@@ -56,14 +56,15 @@
Int4 enableLeave;
Int instanceID;
+ Int4 vertexID;
typedef Shader::DestinationParameter Dst;
typedef Shader::SourceParameter Src;
typedef Shader::Control Control;
typedef Shader::Usage Usage;
- void pipeline() override;
- void program();
+ void pipeline(UInt &index) override;
+ void program(UInt &index);
void passThrough();
Vector4f fetchRegister(const Src &src, unsigned int offset = 0);
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index 42faa80..0f1ccdf 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -62,7 +62,7 @@
*Pointer<UInt>(tagCache + tagIndex) = indexQ;
readInput(indexQ);
- pipeline();
+ pipeline(indexQ);
postTransform();
computeClipFlags();
diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp
index dd4bf13..905118b 100644
--- a/src/Shader/VertexRoutine.hpp
+++ b/src/Shader/VertexRoutine.hpp
@@ -54,7 +54,7 @@
const VertexProcessor::State &state;
private:
- virtual void pipeline() = 0;
+ virtual void pipeline(UInt &index) = 0;
typedef VertexProcessor::State::Input Stream;
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp
index a98932b..361c76f 100644
--- a/src/Shader/VertexShader.cpp
+++ b/src/Shader/VertexShader.cpp
@@ -27,6 +27,7 @@
positionRegister = Pos;
pointSizeRegister = Unused;
instanceIdDeclared = false;
+ vertexIdDeclared = false;
textureSampling = false;
for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
@@ -48,6 +49,7 @@
positionRegister = vs->positionRegister;
pointSizeRegister = vs->pointSizeRegister;
instanceIdDeclared = vs->instanceIdDeclared;
+ vertexIdDeclared = vs->vertexIdDeclared;
usedSamplers = vs->usedSamplers;
optimize();
@@ -62,6 +64,7 @@
positionRegister = Pos;
pointSizeRegister = Unused;
instanceIdDeclared = false;
+ vertexIdDeclared = false;
textureSampling = false;
for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
diff --git a/src/Shader/VertexShader.hpp b/src/Shader/VertexShader.hpp
index 0ca7b93..9a9a0a6 100644
--- a/src/Shader/VertexShader.hpp
+++ b/src/Shader/VertexShader.hpp
@@ -45,6 +45,7 @@
void setPositionRegister(int posReg);
void setPointSizeRegister(int ptSizeReg);
void declareInstanceId() { instanceIdDeclared = true; }
+ void declareVertexId() { vertexIdDeclared = true; }
const Semantic& getInput(int inputIdx) const;
const Semantic& getOutput(int outputIdx, int component) const;
@@ -52,6 +53,7 @@
int getPositionRegister() const { return positionRegister; }
int getPointSizeRegister() const { return pointSizeRegister; }
bool isInstanceIdDeclared() const { return instanceIdDeclared; }
+ bool isVertexIdDeclared() const { return vertexIdDeclared; }
private:
void analyze();
@@ -68,6 +70,7 @@
int pointSizeRegister;
bool instanceIdDeclared;
+ bool vertexIdDeclared;
bool textureSampling;
};
}
diff --git a/src/SwiftShader/SwiftShader.vcxproj b/src/SwiftShader/SwiftShader.vcxproj
index 1ab5865..77ab670 100644
--- a/src/SwiftShader/SwiftShader.vcxproj
+++ b/src/SwiftShader/SwiftShader.vcxproj
@@ -118,7 +118,7 @@
<FavorSizeOrSpeed>Neither</FavorSizeOrSpeed>
<OmitFramePointers>false</OmitFramePointers>
<AdditionalIncludeDirectories>..\;..\Main;..\Renderer;..\Shader;..\Common;..\SwiftAsm;..\libjpeg;..\SwiftShader;..\D3D9;..\Reactor;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_HAS_EXCEPTIONS=0;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NO_SANITIZE_FUNCTION=;_DEBUG;_LIB;_HAS_EXCEPTIONS=0;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<ExceptionHandling>false</ExceptionHandling>
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
@@ -153,7 +153,7 @@
<FavorSizeOrSpeed>Neither</FavorSizeOrSpeed>
<OmitFramePointers>false</OmitFramePointers>
<AdditionalIncludeDirectories>..\;..\Main;..\Renderer;..\Shader;..\Common;..\SwiftAsm;..\libjpeg;..\SwiftShader;..\D3D9;..\Reactor;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_HAS_EXCEPTIONS=0;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NO_SANITIZE_FUNCTION=;_DEBUG;_LIB;_HAS_EXCEPTIONS=0;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<ExceptionHandling>false</ExceptionHandling>
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
@@ -186,7 +186,7 @@
<OmitFramePointers>true</OmitFramePointers>
<WholeProgramOptimization>true</WholeProgramOptimization>
<AdditionalIncludeDirectories>..\;..\Main;..\Renderer;..\Shader;..\Common;..\SwiftAsm;..\libjpeg;..\SwiftShader;..\D3D9;..\Reactor;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NO_SANITIZE_FUNCTION=;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ExceptionHandling>false</ExceptionHandling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
@@ -221,7 +221,7 @@
<OmitFramePointers>false</OmitFramePointers>
<WholeProgramOptimization>false</WholeProgramOptimization>
<AdditionalIncludeDirectories>..\;..\Main;..\Renderer;..\Shader;..\Common;..\SwiftAsm;..\libjpeg;..\SwiftShader;..\D3D9;..\Reactor;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NO_SANITIZE_FUNCTION=;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ExceptionHandling>false</ExceptionHandling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
@@ -257,7 +257,7 @@
<OmitFramePointers>true</OmitFramePointers>
<WholeProgramOptimization>true</WholeProgramOptimization>
<AdditionalIncludeDirectories>..\;..\Main;..\Renderer;..\Shader;..\Common;..\SwiftAsm;..\libjpeg;..\SwiftShader;..\D3D9;..\Reactor;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NO_SANITIZE_FUNCTION=;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ExceptionHandling>false</ExceptionHandling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
@@ -295,7 +295,7 @@
<OmitFramePointers>false</OmitFramePointers>
<WholeProgramOptimization>false</WholeProgramOptimization>
<AdditionalIncludeDirectories>..\;..\Main;..\Renderer;..\Shader;..\Common;..\SwiftAsm;..\libjpeg;..\SwiftShader;..\D3D9;..\Reactor;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WIN32;NO_SANITIZE_FUNCTION=;NDEBUG;_LIB;_CRT_SECURE_NO_DEPRECATE;_SECURE_SCL=0;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ExceptionHandling>false</ExceptionHandling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
diff --git a/src/swiftshader.gni b/src/swiftshader.gni
new file mode 100644
index 0000000..253d405
--- /dev/null
+++ b/src/swiftshader.gni
@@ -0,0 +1,57 @@
+# Copyright (c) 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This file contains configs that need to be added or removed to all
+# SwiftShader libraries
+
+configs_to_add = []
+configs_to_delete = []
+
+if (is_win) {
+ configs_to_delete += [ "//build/config/win:unicode" ]
+}
+
+if (is_debug) {
+ # always build release version of SwiftShader for performance reasons
+ configs_to_delete += [
+ "//build/config:debug",
+ "//build/config/compiler:default_optimization",
+ ]
+ configs_to_add += [
+ "//build/config:release",
+ "//build/config/compiler:optimize",
+ ]
+ if (is_win) {
+ configs_to_delete += [ "//build/config/win:default_crt" ]
+ configs_to_add += [ "//build/config/win:release_crt" ]
+ }
+}
+
+configs_to_delete += [ "//build/config/compiler:chromium_code" ]
+configs_to_add += [
+ "//build/config/compiler:no_chromium_code",
+ "//third_party/swiftshader:swiftshader_config",
+]
+
+template("swiftshader_source_set") {
+ source_set(target_name) {
+ configs -= configs_to_delete
+ configs += configs_to_add
+ forward_variables_from(invoker, "*", [ "configs" ])
+ if (defined(invoker.configs)) {
+ configs += invoker.configs
+ }
+ }
+}
+
+template("swiftshader_shared_library") {
+ shared_library(target_name) {
+ configs -= configs_to_delete
+ configs += configs_to_add
+ forward_variables_from(invoker, "*", [ "configs" ])
+ if (defined(invoker.configs)) {
+ configs += invoker.configs
+ }
+ }
+}
diff --git a/third_party/LLVM/BUILD.gn b/third_party/LLVM/BUILD.gn
index db8d05d..3fc4d22 100644
--- a/third_party/LLVM/BUILD.gn
+++ b/third_party/LLVM/BUILD.gn
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import("../../src/swiftshader.gni")
+
# Need a separate config to ensure the warnings are added to the end.
config("swiftshader_llvm_private_config") {
cflags = []
@@ -53,8 +55,6 @@
"-msse2",
"-Wno-header-hygiene",
"-Wno-null-dereference",
- "-Wno-unused-private-field",
- "-Wno-unused-local-typedef",
]
} else {
cflags += [ "-Wno-unused-but-set-variable" ]
@@ -64,6 +64,8 @@
"-Wno-deprecated-declarations",
"-Wno-enum-compare",
"-Wno-unused-function",
+ "-Wno-unused-local-typedef",
+ "-Wno-unused-private-field",
"-Wno-unused-result",
"-Wno-unused-variable",
]
@@ -73,7 +75,7 @@
]
}
-source_set("swiftshader_llvm") {
+swiftshader_source_set("swiftshader_llvm") {
sources = [
"lib/Analysis/AliasAnalysis.cpp",
"lib/Analysis/AliasSetTracker.cpp",
@@ -449,10 +451,7 @@
"lib/VMCore/Verifier.cpp",
]
- if (is_win) {
- configs -= [ "//build/config/win:unicode" ]
- }
- configs += [ ":swiftshader_llvm_private_config" ]
+ configs = [ ":swiftshader_llvm_private_config" ]
include_dirs = [ "lib/Target/X86" ]
diff --git a/third_party/llvm-subzero/lib/Support/Atomic.cpp b/third_party/llvm-subzero/lib/Support/Atomic.cpp
index 80550e2..7328a93 100644
--- a/third_party/llvm-subzero/lib/Support/Atomic.cpp
+++ b/third_party/llvm-subzero/lib/Support/Atomic.cpp
@@ -17,7 +17,9 @@
using namespace llvm;
#if defined(_MSC_VER)
-#include <Intrin.h>
+#include <intrin.h>
+
+// We must include windows.h after intrin.h.
#include <windows.h>
#undef MemoryFence
#endif