Revert "Subzero: add support for large stacks on Windows"
This reverts commit f50cc82b3bed049f50ce8e530e3be5db307baa64.
Reason for revert: Skia roll broken. See:
https://chromium-review.googlesource.com/c/chromium/src/+/2067664
"lld-link: error: undefined symbol: _chkstk"
Change-Id: I2ac508c9560e3a49e8a856906c3a5f6141c45207
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/41489
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index e4137b3..f95ba71 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -18,7 +18,6 @@
#include "gtest/gtest.h"
-#include <array>
#include <cmath>
#include <thread>
#include <tuple>
@@ -1502,51 +1501,6 @@
}
}
-// This test was written because on Windows with Subzero, we would get a crash when executing a function
-// with a large number of local variables. The problem was that on Windows, 4K pages are allocated as
-// needed for the stack whenever an access is made in a "guard page", at which point the page is committed,
-// and the next 4K page becomes the guard page. If a stack access is made that's beyond the guard page,
-// a regular page fault occurs. To fix this, Subzero (and any compiler) now emits a call to __chkstk with
-// the stack size in EAX, so that it can probe the stack in 4K increments up to that size, committing the
-// required pages. See https://docs.microsoft.com/en-us/windows/win32/devnotes/-win32-chkstk.
-TEST(ReactorUnitTests, LargeStack)
-{
- // An empirically large enough value to access outside the guard pages
- constexpr int ArrayByteSize = 24 * 1024;
- constexpr int ArraySize = ArrayByteSize / sizeof(int32_t);
-
- FunctionT<void(int32_t * v)> function;
- {
- // Allocate a stack array large enough that writing to the first element will reach beyond
- // the guard page.
- Array<Int, ArraySize> largeStackArray;
- for(int i = 0; i < ArraySize; ++i)
- {
- largeStackArray[i] = i;
- }
-
- Pointer<Int> in = function.Arg<0>();
- for(int i = 0; i < ArraySize; ++i)
- {
- in[i] = largeStackArray[i];
- }
- }
-
- auto routine = function("one");
- std::array<int32_t, ArraySize> v;
-
- // Run this in a thread, so that we get the default reserved stack size (8K on Win64).
- std::thread t([&] {
- routine(v.data());
- });
- t.join();
-
- for(int i = 0; i < ArraySize; ++i)
- {
- EXPECT_EQ(v[i], i);
- }
-}
-
TEST(ReactorUnitTests, Call)
{
struct Class
diff --git a/third_party/subzero/src/IceTargetLoweringX8632.cpp b/third_party/subzero/src/IceTargetLoweringX8632.cpp
index 3262279..0a7a56e 100644
--- a/third_party/subzero/src/IceTargetLoweringX8632.cpp
+++ b/third_party/subzero/src/IceTargetLoweringX8632.cpp
@@ -17,10 +17,6 @@
#include "IceTargetLoweringX8632Traits.h"
-#if defined(SUBZERO_USE_MICROSOFT_ABI)
-extern "C" void _chkstk();
-#endif
-
namespace X8632 {
std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
return ::Ice::X8632::TargetX8632::create(Func);
@@ -406,32 +402,6 @@
lowerIndirectJump(T_ecx);
}
-void TargetX8632::emitStackProbe(size_t StackSizeBytes) {
-#if defined(SUBZERO_USE_MICROSOFT_ABI)
- if (StackSizeBytes >= 4096) {
- // _chkstk on Win32 is actually __alloca_probe, which adjusts ESP by the
- // stack amount specified in EAX, so we save ESP in ECX, and restore them
- // both after the call.
-
- Variable *EAX = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- Variable *ESP = makeReg(IceType_i32, Traits::RegisterSet::Reg_esp);
- Variable *ECX = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
-
- _push_reg(ECX->getRegNum());
- _mov(ECX, ESP);
-
- _mov(EAX, Ctx->getConstantInt32(StackSizeBytes));
-
- auto *CallTarget =
- Ctx->getConstantInt32(reinterpret_cast<int32_t>(&_chkstk));
- emitCallToTarget(CallTarget, nullptr);
-
- _mov(ESP, ECX);
- _pop_reg(ECX->getRegNum());
- }
-#endif
-}
-
// In some cases, there are x-macros tables for both high-level and low-level
// instructions/operands that use the same enum key value. The tables are kept
// separate to maintain a proper separation between abstraction layers. There
diff --git a/third_party/subzero/src/IceTargetLoweringX8632.h b/third_party/subzero/src/IceTargetLoweringX8632.h
index 349fb92..2715b0f 100644
--- a/third_party/subzero/src/IceTargetLoweringX8632.h
+++ b/third_party/subzero/src/IceTargetLoweringX8632.h
@@ -59,7 +59,6 @@
void initSandbox();
bool legalizeOptAddrForSandbox(OptAddr *Addr);
void emitSandboxedReturn();
- void emitStackProbe(size_t StackSizeBytes);
void lowerIndirectJump(Variable *JumpTarget);
void emitGetIP(CfgNode *Node);
Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override;
diff --git a/third_party/subzero/src/IceTargetLoweringX8664.cpp b/third_party/subzero/src/IceTargetLoweringX8664.cpp
index 5ec9e34..9cfab50 100644
--- a/third_party/subzero/src/IceTargetLoweringX8664.cpp
+++ b/third_party/subzero/src/IceTargetLoweringX8664.cpp
@@ -17,10 +17,6 @@
#include "IceDefs.h"
#include "IceTargetLoweringX8664Traits.h"
-#if defined(SUBZERO_USE_MICROSOFT_ABI)
-extern "C" void __chkstk();
-#endif
-
namespace X8664 {
std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
return ::Ice::X8664::TargetX8664::create(Func);
@@ -762,26 +758,6 @@
}
}
-void TargetX8664::emitStackProbe(size_t StackSizeBytes) {
-#if defined(SUBZERO_USE_MICROSOFT_ABI)
- // Mirroring the behavior of MSVC here, which emits a _chkstk when locals are
- // >= 4KB, rather than the 8KB claimed by the docs.
- if (StackSizeBytes >= 4096) {
- // __chkstk on Win64 probes the stack up to RSP - EAX, but does not clobber
- // RSP, so we don't need to save and restore it.
-
- Variable *EAX = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- _mov(EAX, Ctx->getConstantInt32(StackSizeBytes));
-
- auto *CallTarget =
- Ctx->getConstantInt64(reinterpret_cast<int64_t>(&__chkstk));
- Operand *CallTargetReg =
- legalizeToReg(CallTarget, Traits::RegisterSet::Reg_r11);
- emitCallToTarget(CallTargetReg, nullptr);
- }
-#endif
-}
-
// In some cases, there are x-macros tables for both high-level and low-level
// instructions/operands that use the same enum key value. The tables are kept
// separate to maintain a proper separation between abstraction layers. There
diff --git a/third_party/subzero/src/IceTargetLoweringX8664.h b/third_party/subzero/src/IceTargetLoweringX8664.h
index 3f33050..ec24df6 100644
--- a/third_party/subzero/src/IceTargetLoweringX8664.h
+++ b/third_party/subzero/src/IceTargetLoweringX8664.h
@@ -62,7 +62,6 @@
void initSandbox();
bool legalizeOptAddrForSandbox(OptAddr *Addr);
void emitSandboxedReturn();
- void emitStackProbe(size_t StackSizeBytes);
void lowerIndirectJump(Variable *JumpTarget);
void emitGetIP(CfgNode *Node);
Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override;
diff --git a/third_party/subzero/src/IceTargetLoweringX86Base.h b/third_party/subzero/src/IceTargetLoweringX86Base.h
index 46df7be..9d60609 100644
--- a/third_party/subzero/src/IceTargetLoweringX86Base.h
+++ b/third_party/subzero/src/IceTargetLoweringX86Base.h
@@ -376,12 +376,6 @@
void emitSandboxedReturn() {
dispatchToConcrete(&Traits::ConcreteTarget::emitSandboxedReturn);
}
-
- void emitStackProbe(size_t StackSizeBytes) {
- dispatchToConcrete(&Traits::ConcreteTarget::emitStackProbe,
- std::move(StackSizeBytes));
- }
-
/// Emit just the call instruction (without argument or return variable
/// processing), sandboxing if needed.
virtual Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) = 0;
diff --git a/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h b/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
index 5b19e7c..becbbed 100644
--- a/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
+++ b/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
@@ -1199,8 +1199,6 @@
SpillAreaSizeBytes = StackSize - StackOffset; // Adjust for alignment, if any
if (SpillAreaSizeBytes) {
- emitStackProbe(SpillAreaSizeBytes);
-
// Generate "sub stackptr, SpillAreaSizeBytes"
_sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
}