blob: 106ac354c7683a6864137a5400480f8140c2f71d [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton09a7f452019-04-25 15:22:43 +010029#if defined(__clang__)
30// LLVM has occurances of the extra-semi warning in its headers, which will be
31// treated as an error in SwiftShader targets.
32#pragma clang diagnostic push
33#pragma clang diagnostic ignored "-Wextra-semi"
34#endif // defined(__clang__)
35
Ben Clayton5875be52019-04-11 14:57:40 -040036#include "llvm/Analysis/LoopPass.h"
37#include "llvm/ExecutionEngine/ExecutionEngine.h"
38#include "llvm/ExecutionEngine/JITSymbol.h"
39#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
40#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
41#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
42#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
43#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
44#include "llvm/ExecutionEngine/SectionMemoryManager.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalVariable.h"
Ben Clayton5875be52019-04-11 14:57:40 -040049#include "llvm/IR/Intrinsics.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010050#include "llvm/IR/IRBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040051#include "llvm/IR/LegacyPassManager.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010052#include "llvm/IR/LLVMContext.h"
Ben Clayton5875be52019-04-11 14:57:40 -040053#include "llvm/IR/Mangler.h"
54#include "llvm/IR/Module.h"
Ben Clayton4b944652019-05-02 10:56:19 +010055#include "llvm/IR/Verifier.h"
Ben Clayton5875be52019-04-11 14:57:40 -040056#include "llvm/Support/Error.h"
57#include "llvm/Support/TargetSelect.h"
58#include "llvm/Target/TargetOptions.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010059#include "llvm/Transforms/Coroutines.h"
Ben Clayton5875be52019-04-11 14:57:40 -040060#include "llvm/Transforms/InstCombine/InstCombine.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010061#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/IPO/PassManagerBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040063#include "llvm/Transforms/Scalar.h"
64#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040065
Ben Clayton09a7f452019-04-25 15:22:43 +010066#if defined(__clang__)
67#pragma clang diagnostic pop
68#endif // defined(__clang__)
69
Ben Clayton5875be52019-04-11 14:57:40 -040070#define ARGS(...) {__VA_ARGS__}
71#define CreateCall2 CreateCall
72#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080073
Ben Clayton5875be52019-04-11 14:57:40 -040074#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080075
John Bauman89401822014-05-06 15:04:28 -040076#include <fstream>
Ben Claytoncee3dff2019-05-22 12:01:22 +010077#include <iostream>
78#include <mutex>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000079#include <numeric>
80#include <thread>
John Bauman89401822014-05-06 15:04:28 -040081
Nicolas Capens47dc8672017-04-25 12:54:39 -040082#if defined(__i386__) || defined(__x86_64__)
83#include <xmmintrin.h>
84#endif
85
Logan Chien40a60052018-09-26 19:03:53 +080086#include <math.h>
87
Nicolas Capenscb122582014-05-06 23:34:44 -040088#if defined(__x86_64__) && defined(_WIN32)
Ben Clayton2f58df32019-06-23 21:29:25 +010089 extern "C" void X86CompilationCallback()
90 {
91 UNIMPLEMENTED("X86CompilationCallback");
92 }
93#endif
94
95#if defined(_WIN64)
96 extern "C" void __chkstk();
97#elif defined(_WIN32)
98 extern "C" void _chkstk();
John Bauman66b8ab22014-05-06 15:57:45 -040099#endif
100
Nicolas Capens48461502018-08-06 14:20:45 -0400101namespace rr
Logan Chien52cde602018-09-03 19:37:57 +0800102{
Ben Clayton6f8e5652019-06-29 01:58:02 +0100103 void* resolveExternalSymbol(const char*);
Logan Chien52cde602018-09-03 19:37:57 +0800104}
105
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400106namespace
107{
Ben Clayton55bc37a2019-07-04 12:17:12 +0100108 // Default configuration settings. Must be accessed under mutex lock.
109 std::mutex defaultConfigLock;
110 rr::Config &defaultConfig()
111 {
112 // This uses a static in a function to avoid the cost of a global static
113 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
114 static rr::Config config = rr::Config::Edit()
115 .set(rr::Optimization::Level::Default)
116 .add(rr::Optimization::Pass::ScalarReplAggregates)
117 .add(rr::Optimization::Pass::InstructionCombining)
118 .apply({});
119 return config;
120 }
121
Ben Clayton52ce1e92019-07-15 11:41:00 +0100122 // Cache provides a simple, thread-safe key-value store.
123 template <typename KEY, typename VALUE>
124 class Cache
Ben Clayton6f8e5652019-06-29 01:58:02 +0100125 {
Ben Clayton52ce1e92019-07-15 11:41:00 +0100126 public:
127 Cache() = default;
128 Cache(const Cache& other);
129 VALUE getOrCreate(KEY key, std::function<VALUE()> create);
130 private:
131 mutable std::mutex mutex; // mutable required for copy constructor.
132 std::unordered_map<KEY, VALUE> map;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100133 };
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400134
Ben Clayton52ce1e92019-07-15 11:41:00 +0100135 template <typename KEY, typename VALUE>
136 Cache<KEY, VALUE>::Cache(const Cache& other)
137 {
138 std::unique_lock<std::mutex> lock(other.mutex);
139 map = other.map;
140 }
141
142 template <typename KEY, typename VALUE>
143 VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
144 {
145 std::unique_lock<std::mutex> lock(mutex);
146 auto it = map.find(key);
147 if (it != map.end())
148 {
149 return it->second;
150 }
151 auto value = create();
152 map.emplace(key, value);
153 return value;
154 }
155
Ben Clayton6f8e5652019-06-29 01:58:02 +0100156 // JITGlobals is a singleton that holds all the immutable machine specific
157 // information for the host device.
Ben Clayton52ce1e92019-07-15 11:41:00 +0100158 class JITGlobals
Ben Clayton6f8e5652019-06-29 01:58:02 +0100159 {
160 public:
Ben Clayton52ce1e92019-07-15 11:41:00 +0100161 using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100162
Ben Clayton52ce1e92019-07-15 11:41:00 +0100163 static JITGlobals * get();
164
165 const std::string mcpu;
166 const std::vector<std::string> mattrs;
167 const char* const march;
168 const llvm::TargetOptions targetOptions;
169 const llvm::DataLayout dataLayout;
170
171 TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100172
173 private:
Ben Clayton52ce1e92019-07-15 11:41:00 +0100174 static JITGlobals create();
175 static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
176 JITGlobals(const char *mcpu,
177 const std::vector<std::string> &mattrs,
178 const char *march,
179 const llvm::TargetOptions &targetOptions,
180 const llvm::DataLayout &dataLayout);
181 JITGlobals(const JITGlobals&) = default;
182
183 // The cache key here is actually a rr::Optimization::Level. We use int
184 // as 'enum class' types do not provide builtin hash functions until
185 // C++14. See: https://stackoverflow.com/a/29618545.
186 Cache<int, TargetMachineSPtr> targetMachines;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100187 };
188
Ben Clayton52ce1e92019-07-15 11:41:00 +0100189 JITGlobals * JITGlobals::get()
Ben Clayton6f8e5652019-06-29 01:58:02 +0100190 {
Ben Clayton52ce1e92019-07-15 11:41:00 +0100191 static JITGlobals instance = create();
Ben Clayton6f8e5652019-06-29 01:58:02 +0100192 return &instance;
193 }
194
Ben Clayton52ce1e92019-07-15 11:41:00 +0100195 JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100196 {
Ben Clayton52ce1e92019-07-15 11:41:00 +0100197 return targetMachines.getOrCreate(static_cast<int>(optlevel), [&]() {
198 return TargetMachineSPtr(llvm::EngineBuilder()
199#ifdef ENABLE_RR_DEBUG_INFO
Ben Claytone031f362019-07-20 12:35:40 +0100200 .setOptLevel(toLLVM(rr::Optimization::Level::None))
Ben Clayton52ce1e92019-07-15 11:41:00 +0100201#else
202 .setOptLevel(toLLVM(optlevel))
203#endif // ENABLE_RR_DEBUG_INFO
204 .setMCPU(mcpu)
205 .setMArch(march)
206 .setMAttrs(mattrs)
207 .setTargetOptions(targetOptions)
208 .selectTarget());
209 });
210 }
Ben Clayton49f80512019-07-04 17:30:54 +0100211
Ben Clayton52ce1e92019-07-15 11:41:00 +0100212 JITGlobals JITGlobals::create()
213 {
214 struct LLVMInitializer
215 {
216 LLVMInitializer()
217 {
218 llvm::InitializeNativeTarget();
219 llvm::InitializeNativeTargetAsmPrinter();
220 llvm::InitializeNativeTargetAsmParser();
221 }
222 };
223 static LLVMInitializer initializeLLVM;
224
225 auto mcpu = llvm::sys::getHostCPUName();
226
Ben Clayton6f8e5652019-06-29 01:58:02 +0100227 llvm::StringMap<bool> features;
228 bool ok = llvm::sys::getHostCPUFeatures(features);
229
230#if defined(__i386__) || defined(__x86_64__) || \
231(defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
232 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
233#else
234 (void) ok; // getHostCPUFeatures always returns false on other platforms
Ben Claytonac07ed82019-03-26 14:17:41 +0000235#endif
236
Ben Clayton52ce1e92019-07-15 11:41:00 +0100237 std::vector<std::string> mattrs;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100238 for (auto &feature : features)
239 {
240 if (feature.second) { mattrs.push_back(feature.first()); }
241 }
242
Ben Clayton52ce1e92019-07-15 11:41:00 +0100243 const char* march = nullptr;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100244#if defined(__x86_64__)
Ben Clayton49f80512019-07-04 17:30:54 +0100245 march = "x86-64";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100246#elif defined(__i386__)
Ben Clayton49f80512019-07-04 17:30:54 +0100247 march = "x86";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100248#elif defined(__aarch64__)
Ben Clayton49f80512019-07-04 17:30:54 +0100249 march = "arm64";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100250#elif defined(__arm__)
Ben Clayton49f80512019-07-04 17:30:54 +0100251 march = "arm";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100252#elif defined(__mips__)
253#if defined(__mips64)
Ben Clayton49f80512019-07-04 17:30:54 +0100254 march = "mips64el";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100255#else
Ben Clayton49f80512019-07-04 17:30:54 +0100256 march = "mipsel";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100257#endif
258#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
Ben Clayton49f80512019-07-04 17:30:54 +0100259 march = "ppc64le";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100260#else
261 #error "unknown architecture"
262#endif
263
Ben Clayton52ce1e92019-07-15 11:41:00 +0100264 llvm::TargetOptions targetOptions;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100265 targetOptions.UnsafeFPMath = false;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100266
267 auto targetMachine = std::unique_ptr<llvm::TargetMachine>(
268 llvm::EngineBuilder()
269 .setOptLevel(llvm::CodeGenOpt::None)
Ben Clayton49f80512019-07-04 17:30:54 +0100270 .setMCPU(mcpu)
271 .setMArch(march)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100272 .setMAttrs(mattrs)
273 .setTargetOptions(targetOptions)
274 .selectTarget());
275
Ben Clayton52ce1e92019-07-15 11:41:00 +0100276 auto dataLayout = targetMachine->createDataLayout();
277
278 return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout);
279 }
280
281 llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
282 {
283 switch (level)
284 {
285 case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None;
286 case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less;
287 case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default;
288 case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
289 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
290 }
291 return ::llvm::CodeGenOpt::Default;
292 }
293
294 JITGlobals::JITGlobals(const char* mcpu,
295 const std::vector<std::string> &mattrs,
296 const char* march,
297 const llvm::TargetOptions &targetOptions,
298 const llvm::DataLayout &dataLayout) :
299 mcpu(mcpu),
300 mattrs(mattrs),
301 march(march),
302 targetOptions(targetOptions),
303 dataLayout(dataLayout)
304 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100305 }
306
307 // JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
308 // object layer as each routine may require different target machine
309 // settings and no Reactor routine directly links against another.
310 class JITRoutine : public rr::Routine
311 {
312 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
313 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
314 public:
Ben Clayton68cfc782019-06-29 12:31:08 +0100315 JITRoutine(
316 std::unique_ptr<llvm::Module> module,
317 llvm::Function **funcs,
318 size_t count,
Ben Clayton55bc37a2019-07-04 12:17:12 +0100319 const rr::Config &config) :
Ben Clayton6f8e5652019-06-29 01:58:02 +0100320 resolver(createLegacyLookupResolver(
321 session,
322 [&](const std::string &name) {
323 void *func = rr::resolveExternalSymbol(name.c_str());
324 if (func != nullptr)
325 {
326 return llvm::JITSymbol(
327 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
328 }
329 return objLayer.findSymbol(name, true);
330 },
331 [](llvm::Error err) {
332 if (err)
333 {
334 // TODO: Log the symbol resolution errors.
335 return;
336 }
337 })),
Ben Clayton52ce1e92019-07-15 11:41:00 +0100338 targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel())),
Ben Clayton6f8e5652019-06-29 01:58:02 +0100339 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
340 objLayer(
341 session,
342 [this](llvm::orc::VModuleKey) {
343 return ObjLayer::Resources{std::make_shared<llvm::SectionMemoryManager>(), resolver};
344 },
345 ObjLayer::NotifyLoadedFtor(),
346 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
347#ifdef ENABLE_RR_DEBUG_INFO
348 rr::DebugInfo::NotifyObjectEmitted(Obj, L);
349#endif // ENABLE_RR_DEBUG_INFO
350 },
351 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
352#ifdef ENABLE_RR_DEBUG_INFO
353 rr::DebugInfo::NotifyFreeingObject(Obj);
354#endif // ENABLE_RR_DEBUG_INFO
355 }
356 ),
357 addresses(count)
358 {
359 std::vector<std::string> mangledNames(count);
360 for (size_t i = 0; i < count; i++)
361 {
362 auto func = funcs[i];
363 static size_t numEmittedFunctions = 0;
364 std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
365 func->setName(name);
366 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
367 func->setDoesNotThrow();
368
369 llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
370 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, JITGlobals::get()->dataLayout);
371 }
372
373 auto moduleKey = session.allocateVModule();
374
375 // Once the module is passed to the compileLayer, the
376 // llvm::Functions are freed. Make sure funcs are not referenced
377 // after this point.
378 funcs = nullptr;
379
380 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(module)));
381
382 // Resolve the function addresses.
383 for (size_t i = 0; i < count; i++)
384 {
385 auto symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
386 if(auto address = symbol.getAddress())
387 {
388 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(address.get()));
389 }
390 }
391 }
392
393 const void *getEntry(int index) override
394 {
395 return addresses[index];
396 }
397
398 private:
399 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
Ben Clayton52ce1e92019-07-15 11:41:00 +0100400 std::shared_ptr<llvm::TargetMachine> targetMachine;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100401 llvm::orc::ExecutionSession session;
402 CompileLayer compileLayer;
403 ObjLayer objLayer;
404 std::vector<const void *> addresses;
405 };
406
407 // JITBuilder holds all the LLVM state for building routines.
408 class JITBuilder
409 {
410 public:
Ben Clayton55bc37a2019-07-04 12:17:12 +0100411 JITBuilder(const rr::Config &config) :
412 config(config),
Ben Clayton6f8e5652019-06-29 01:58:02 +0100413 module(new llvm::Module("", context)),
414 builder(new llvm::IRBuilder<>(context))
415 {
416 module->setDataLayout(JITGlobals::get()->dataLayout);
417 }
418
Ben Clayton55bc37a2019-07-04 12:17:12 +0100419 void optimize(const rr::Config &cfg)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100420 {
Ben Clayton55bc37a2019-07-04 12:17:12 +0100421
Ben Clayton6f8e5652019-06-29 01:58:02 +0100422#ifdef ENABLE_RR_DEBUG_INFO
423 if (debugInfo != nullptr)
424 {
425 return; // Don't optimize if we're generating debug info.
426 }
427#endif // ENABLE_RR_DEBUG_INFO
428
429 std::unique_ptr<llvm::legacy::PassManager> passManager(
430 new llvm::legacy::PassManager());
431
Ben Clayton55bc37a2019-07-04 12:17:12 +0100432 for(auto pass : cfg.getOptimization().getPasses())
Ben Clayton6f8e5652019-06-29 01:58:02 +0100433 {
Ben Clayton55bc37a2019-07-04 12:17:12 +0100434 switch(pass)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100435 {
Ben Clayton55bc37a2019-07-04 12:17:12 +0100436 case rr::Optimization::Pass::Disabled: break;
437 case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
438 case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break;
439 case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
440 case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break;
441 case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
442 case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break;
443 case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
444 case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break;
445 case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
Ben Clayton28ae0a42019-07-10 00:50:23 +0100446 case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100447 default:
Ben Clayton55bc37a2019-07-04 12:17:12 +0100448 UNREACHABLE("pass: %d", int(pass));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100449 }
450 }
451
452 passManager->run(*module);
453 }
454
Ben Clayton6897e9b2019-07-16 17:27:27 +0100455 std::shared_ptr<rr::Routine> acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100456 {
457 ASSERT(module);
Ben Clayton6897e9b2019-07-16 17:27:27 +0100458 return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100459 }
460
Ben Clayton55bc37a2019-07-04 12:17:12 +0100461 const rr::Config config;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100462 llvm::LLVMContext context;
463 std::unique_ptr<llvm::Module> module;
464 std::unique_ptr<llvm::IRBuilder<>> builder;
465 llvm::Function *function = nullptr;
466
467 struct CoroutineState
468 {
469 llvm::Function *await = nullptr;
470 llvm::Function *destroy = nullptr;
471 llvm::Value *handle = nullptr;
472 llvm::Value *id = nullptr;
473 llvm::Value *promise = nullptr;
Ben Clayton16da2812019-07-09 23:28:51 +0100474 llvm::Type *yieldType = nullptr;
475 llvm::BasicBlock *entryBlock = nullptr;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100476 llvm::BasicBlock *suspendBlock = nullptr;
477 llvm::BasicBlock *endBlock = nullptr;
478 llvm::BasicBlock *destroyBlock = nullptr;
479 };
480 CoroutineState coroutine;
481
482#ifdef ENABLE_RR_DEBUG_INFO
483 std::unique_ptr<rr::DebugInfo> debugInfo;
484#endif
485 };
486
487 std::unique_ptr<JITBuilder> jit;
488 std::mutex codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800489
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000490#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000491 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
492 {
493 size_t pos = 0;
494 while((pos = str.find(substr, pos)) != std::string::npos) {
495 str.replace(pos, substr.length(), replacement);
496 pos += replacement.length();
497 }
498 return str;
499 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000500#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000501
Ben Clayton40a885e2019-06-23 19:12:48 +0100502 template <typename T>
503 T alignUp(T val, T alignment)
504 {
505 return alignment * ((val + alignment - 1) / alignment);
506 }
507
508 void* alignedAlloc(size_t size, size_t alignment)
509 {
510 ASSERT(alignment < 256);
511 auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
512 auto aligned = allocation;
513 aligned += sizeof(uint8_t); // Make space for the base-address offset.
514 aligned = reinterpret_cast<uint8_t*>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment)); // align
515 auto offset = static_cast<uint8_t>(aligned - allocation);
516 aligned[-1] = offset;
517 return aligned;
518 }
519
520 void alignedFree(void* ptr)
521 {
522 auto aligned = reinterpret_cast<uint8_t*>(ptr);
523 auto offset = aligned[-1];
524 auto allocation = aligned - offset;
525 delete[] allocation;
526 }
527
Logan Chien0eedc8c2018-08-21 09:34:28 +0800528 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
529 {
530 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
531
532 llvm::VectorType *extTy =
533 llvm::VectorType::getExtendedElementVectorType(ty);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100534 x = jit->builder->CreateZExt(x, extTy);
535 y = jit->builder->CreateZExt(y, extTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800536
537 // (x + y + 1) >> 1
538 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100539 llvm::Value *res = jit->builder->CreateAdd(x, y);
540 res = jit->builder->CreateAdd(res, one);
541 res = jit->builder->CreateLShr(res, one);
542 return jit->builder->CreateTrunc(res, ty);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800543 }
544
545 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800546 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800547 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100548 return jit->builder->CreateSelect(jit->builder->CreateICmp(pred, x, y), x, y);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800549 }
550
551 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800552 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800553 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100554 return jit->builder->CreateSExt(jit->builder->CreateICmp(pred, x, y), dstTy, "");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800555 }
556
Logan Chiene3191012018-08-24 22:01:50 +0800557#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800558 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
559 {
560 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
561 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
562
563 llvm::Value *undef = llvm::UndefValue::get(srcTy);
564 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
565 std::iota(mask.begin(), mask.end(), 0);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100566 llvm::Value *v = jit->builder->CreateShuffleVector(op, undef, mask);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800567
Ben Clayton6f8e5652019-06-29 01:58:02 +0100568 return sext ? jit->builder->CreateSExt(v, dstTy)
569 : jit->builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800570 }
571
572 llvm::Value *lowerPABS(llvm::Value *v)
573 {
574 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
Ben Clayton6f8e5652019-06-29 01:58:02 +0100575 llvm::Value *cmp = jit->builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
576 llvm::Value *neg = jit->builder->CreateNeg(v);
577 return jit->builder->CreateSelect(cmp, v, neg);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800578 }
579#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800580
581#if !defined(__i386__) && !defined(__x86_64__)
582 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800583 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800584 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100585 return jit->builder->CreateSelect(jit->builder->CreateFCmp(pred, x, y), x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800586 }
587
Logan Chien83fc07a2018-09-26 22:14:00 +0800588 llvm::Value *lowerRound(llvm::Value *x)
589 {
590 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100591 jit->module.get(), llvm::Intrinsic::nearbyint, {x->getType()});
592 return jit->builder->CreateCall(nearbyint, ARGS(x));
Logan Chien83fc07a2018-09-26 22:14:00 +0800593 }
594
Logan Chien2faa24a2018-09-26 19:59:32 +0800595 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
596 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100597 return jit->builder->CreateFPToSI(lowerRound(x), ty);
Logan Chien2faa24a2018-09-26 19:59:32 +0800598 }
599
Logan Chien40a60052018-09-26 19:03:53 +0800600 llvm::Value *lowerFloor(llvm::Value *x)
601 {
602 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100603 jit->module.get(), llvm::Intrinsic::floor, {x->getType()});
604 return jit->builder->CreateCall(floor, ARGS(x));
Logan Chien40a60052018-09-26 19:03:53 +0800605 }
606
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800607 llvm::Value *lowerTrunc(llvm::Value *x)
608 {
609 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100610 jit->module.get(), llvm::Intrinsic::trunc, {x->getType()});
611 return jit->builder->CreateCall(trunc, ARGS(x));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800612 }
613
Logan Chiene3191012018-08-24 22:01:50 +0800614 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800615 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800616 {
Logan Chien28794cf2018-09-26 18:58:03 +0800617 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
618 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
619
620 unsigned numBits = ty->getScalarSizeInBits();
621
622 llvm::Value *max, *min, *extX, *extY;
623 if (isSigned)
624 {
625 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
626 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100627 extX = jit->builder->CreateSExt(x, extTy);
628 extY = jit->builder->CreateSExt(y, extTy);
Logan Chien28794cf2018-09-26 18:58:03 +0800629 }
630 else
631 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400632 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800633 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
634 max = llvm::ConstantInt::get(extTy, maxVal, false);
635 min = llvm::ConstantInt::get(extTy, 0, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100636 extX = jit->builder->CreateZExt(x, extTy);
637 extY = jit->builder->CreateZExt(y, extTy);
Logan Chien28794cf2018-09-26 18:58:03 +0800638 }
639
Ben Clayton6f8e5652019-06-29 01:58:02 +0100640 llvm::Value *res = isAdd ? jit->builder->CreateAdd(extX, extY)
641 : jit->builder->CreateSub(extX, extY);
Logan Chien28794cf2018-09-26 18:58:03 +0800642
643 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
644 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
645
Ben Clayton6f8e5652019-06-29 01:58:02 +0100646 return jit->builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800647 }
648
649 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
650 {
Logan Chien28794cf2018-09-26 18:58:03 +0800651 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800652 }
653
654 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
655 {
Logan Chien28794cf2018-09-26 18:58:03 +0800656 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800657 }
658
659 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
660 {
Logan Chien28794cf2018-09-26 18:58:03 +0800661 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800662 }
663
664 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
665 {
Logan Chien28794cf2018-09-26 18:58:03 +0800666 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800667 }
668
669 llvm::Value *lowerSQRT(llvm::Value *x)
670 {
671 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100672 jit->module.get(), llvm::Intrinsic::sqrt, {x->getType()});
673 return jit->builder->CreateCall(sqrt, ARGS(x));
Logan Chiene3191012018-08-24 22:01:50 +0800674 }
675
676 llvm::Value *lowerRCP(llvm::Value *x)
677 {
678 llvm::Type *ty = x->getType();
679 llvm::Constant *one;
680 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
681 {
682 one = llvm::ConstantVector::getSplat(
683 vectorTy->getNumElements(),
684 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
685 }
686 else
687 {
688 one = llvm::ConstantFP::get(ty, 1);
689 }
Ben Clayton6f8e5652019-06-29 01:58:02 +0100690 return jit->builder->CreateFDiv(one, x);
Logan Chiene3191012018-08-24 22:01:50 +0800691 }
692
693 llvm::Value *lowerRSQRT(llvm::Value *x)
694 {
695 return lowerRCP(lowerSQRT(x));
696 }
697
698 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
699 {
700 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
701 llvm::Value *y = llvm::ConstantVector::getSplat(
702 ty->getNumElements(),
703 llvm::ConstantInt::get(ty->getElementType(), scalarY));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100704 return jit->builder->CreateShl(x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800705 }
706
707 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
708 {
709 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
710 llvm::Value *y = llvm::ConstantVector::getSplat(
711 ty->getNumElements(),
712 llvm::ConstantInt::get(ty->getElementType(), scalarY));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100713 return jit->builder->CreateAShr(x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800714 }
715
716 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
717 {
718 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
719 llvm::Value *y = llvm::ConstantVector::getSplat(
720 ty->getNumElements(),
721 llvm::ConstantInt::get(ty->getElementType(), scalarY));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100722 return jit->builder->CreateLShr(x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800723 }
724
725 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
726 {
727 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
728 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
729
Ben Clayton6f8e5652019-06-29 01:58:02 +0100730 llvm::Value *extX = jit->builder->CreateSExt(x, extTy);
731 llvm::Value *extY = jit->builder->CreateSExt(y, extTy);
732 llvm::Value *mult = jit->builder->CreateMul(extX, extY);
Logan Chiene3191012018-08-24 22:01:50 +0800733
734 llvm::Value *undef = llvm::UndefValue::get(extTy);
735
736 llvm::SmallVector<uint32_t, 16> evenIdx;
737 llvm::SmallVector<uint32_t, 16> oddIdx;
738 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
739 {
740 evenIdx.push_back(i);
741 oddIdx.push_back(i + 1);
742 }
743
Ben Clayton6f8e5652019-06-29 01:58:02 +0100744 llvm::Value *lhs = jit->builder->CreateShuffleVector(mult, undef, evenIdx);
745 llvm::Value *rhs = jit->builder->CreateShuffleVector(mult, undef, oddIdx);
746 return jit->builder->CreateAdd(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +0800747 }
748
Logan Chiene3191012018-08-24 22:01:50 +0800749 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
750 {
751 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
752 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
753
754 llvm::IntegerType *dstElemTy =
755 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
756
757 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400758 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800759 llvm::Constant *max, *min;
760 if (isSigned)
761 {
762 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
763 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
764 }
765 else
766 {
767 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
768 min = llvm::ConstantInt::get(srcTy, 0, false);
769 }
770
771 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
772 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
773 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
774 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
775
Ben Clayton6f8e5652019-06-29 01:58:02 +0100776 x = jit->builder->CreateTrunc(x, dstTy);
777 y = jit->builder->CreateTrunc(y, dstTy);
Logan Chiene3191012018-08-24 22:01:50 +0800778
779 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
780 std::iota(index.begin(), index.end(), 0);
781
Ben Clayton6f8e5652019-06-29 01:58:02 +0100782 return jit->builder->CreateShuffleVector(x, y, index);
Logan Chiene3191012018-08-24 22:01:50 +0800783 }
784
785 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
786 {
787 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
788 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100789 llvm::Value *cmp = jit->builder->CreateICmpSLT(x, zero);
Logan Chiene3191012018-08-24 22:01:50 +0800790
Ben Clayton6f8e5652019-06-29 01:58:02 +0100791 llvm::Value *ret = jit->builder->CreateZExt(
792 jit->builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
Logan Chiene3191012018-08-24 22:01:50 +0800793 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
794 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100795 llvm::Value *elem = jit->builder->CreateZExt(
796 jit->builder->CreateExtractElement(cmp, i), retTy);
797 ret = jit->builder->CreateOr(ret, jit->builder->CreateShl(elem, i));
Logan Chiene3191012018-08-24 22:01:50 +0800798 }
799 return ret;
800 }
801
802 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
803 {
804 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
805 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100806 llvm::Value *cmp = jit->builder->CreateFCmpULT(x, zero);
Logan Chiene3191012018-08-24 22:01:50 +0800807
Ben Clayton6f8e5652019-06-29 01:58:02 +0100808 llvm::Value *ret = jit->builder->CreateZExt(
809 jit->builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
Logan Chiene3191012018-08-24 22:01:50 +0800810 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
811 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100812 llvm::Value *elem = jit->builder->CreateZExt(
813 jit->builder->CreateExtractElement(cmp, i), retTy);
814 ret = jit->builder->CreateOr(ret, jit->builder->CreateShl(elem, i));
Logan Chiene3191012018-08-24 22:01:50 +0800815 }
816 return ret;
817 }
818#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800819
820 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
821 {
822 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
823 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
824
825 llvm::Value *extX, *extY;
826 if (sext)
827 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100828 extX = jit->builder->CreateSExt(x, extTy);
829 extY = jit->builder->CreateSExt(y, extTy);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800830 }
831 else
832 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100833 extX = jit->builder->CreateZExt(x, extTy);
834 extY = jit->builder->CreateZExt(y, extTy);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800835 }
836
Ben Clayton6f8e5652019-06-29 01:58:02 +0100837 llvm::Value *mult = jit->builder->CreateMul(extX, extY);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800838
839 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
Ben Clayton6f8e5652019-06-29 01:58:02 +0100840 llvm::Value *mulh = jit->builder->CreateAShr(mult, intTy->getBitWidth());
841 return jit->builder->CreateTrunc(mulh, ty);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800842 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400843}
844
Nicolas Capens48461502018-08-06 14:20:45 -0400845namespace rr
John Bauman89401822014-05-06 15:04:28 -0400846{
Ben Claytonc7904162019-04-17 17:35:48 -0400847 const Capabilities Caps =
848 {
849 true, // CallSupported
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100850 true, // CoroutinesSupported
Ben Claytonc7904162019-04-17 17:35:48 -0400851 };
852
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400853 static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
854 {
855 switch(memoryOrder)
856 {
857 case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed; // https://llvm.org/docs/Atomics.html#monotonic
858 case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
859 case llvm::AtomicOrdering::Release: return std::memory_order_release;
860 case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
861 case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
862 default:
Ben Claytonfb280672019-04-25 11:16:15 +0100863 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400864 return std::memory_order_acq_rel;
865 }
866 }
867
868 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
869 {
870 if(!atomic)
871 {
872 return llvm::AtomicOrdering::NotAtomic;
873 }
874
875 switch(memoryOrder)
876 {
877 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
878 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
879 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
880 case std::memory_order_release: return llvm::AtomicOrdering::Release;
881 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
882 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
883 default:
884 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
885 return llvm::AtomicOrdering::AcquireRelease;
886 }
887 }
888
889 template <typename T>
890 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
891 {
892 *reinterpret_cast<T*>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), atomicOrdering(ordering));
893 }
894
895 template <typename T>
896 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
897 {
898 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), *reinterpret_cast<T*>(val), atomicOrdering(ordering));
899 }
900
Chris Forbesfd4c96d2019-06-20 11:20:42 -0700901#ifdef __ANDROID__
902 template<typename F>
903 static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
904 {
905 // Build an arbitrary op out of looped CAS
906 for (;;)
907 {
908 uint32_t expected = *ptr;
909 uint32_t desired = f(expected, val);
910
911 if (expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
912 return expected;
913 }
914 }
915#endif
916
Ben Clayton6f8e5652019-06-29 01:58:02 +0100917 void* resolveExternalSymbol(const char* name)
Logan Chien40a60052018-09-26 19:03:53 +0800918 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100919 struct Atomic
Logan Chien40a60052018-09-26 19:03:53 +0800920 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100921 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400922 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100923 switch (size)
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400924 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100925 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
926 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
927 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
928 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
929 default:
930 UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400931 }
Ben Clayton6f8e5652019-06-29 01:58:02 +0100932 }
933 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
Chris Forbes9283b252019-06-17 09:44:28 -0700934 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100935 switch (size)
936 {
937 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
938 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
939 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
940 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
941 default:
942 UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
943 }
944 }
945 };
Chris Forbes9283b252019-06-17 09:44:28 -0700946
Ben Clayton6f8e5652019-06-29 01:58:02 +0100947 struct F
948 {
949 static void nop() {}
950 static void neverCalled() { UNREACHABLE("Should never be called"); }
951
952 static void* coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
953 static void coroutine_free_frame(void* ptr) { alignedFree(ptr); }
Ben Clayton40a885e2019-06-23 19:12:48 +0100954
Chris Forbes9283b252019-06-17 09:44:28 -0700955#ifdef __ANDROID__
Ben Clayton6f8e5652019-06-29 01:58:02 +0100956 // forwarders since we can't take address of builtins
957 static void sync_synchronize() { __sync_synchronize(); }
958 static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
959 static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
960 static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
961 static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
962 static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
963 static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
964 static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
Chris Forbesfd4c96d2019-06-20 11:20:42 -0700965
Ben Clayton6f8e5652019-06-29 01:58:02 +0100966 static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a,b);}); }
967 static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a,b);}); }
968 static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a,b);}); }
969 static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a,b);}); }
Chris Forbes9283b252019-06-17 09:44:28 -0700970#endif
Ben Clayton6f8e5652019-06-29 01:58:02 +0100971 };
Ben Claytonac07ed82019-03-26 14:17:41 +0000972
Ben Clayton6f8e5652019-06-29 01:58:02 +0100973 class Resolver
974 {
975 public:
976 using FunctionMap = std::unordered_map<std::string, void *>;
Ben Claytonb5f1a132019-06-24 11:00:53 +0100977
Ben Clayton6f8e5652019-06-29 01:58:02 +0100978 FunctionMap functions;
Ben Claytonb5f1a132019-06-24 11:00:53 +0100979
Ben Clayton6f8e5652019-06-29 01:58:02 +0100980 Resolver()
981 {
982 functions.emplace("nop", reinterpret_cast<void*>(F::nop));
983 functions.emplace("floorf", reinterpret_cast<void*>(floorf));
984 functions.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
985 functions.emplace("truncf", reinterpret_cast<void*>(truncf));
986 functions.emplace("printf", reinterpret_cast<void*>(printf));
987 functions.emplace("puts", reinterpret_cast<void*>(puts));
988 functions.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytonb5f1a132019-06-24 11:00:53 +0100989
Ben Clayton6f8e5652019-06-29 01:58:02 +0100990 functions.emplace("sinf", reinterpret_cast<void*>(sinf));
991 functions.emplace("cosf", reinterpret_cast<void*>(cosf));
992 functions.emplace("asinf", reinterpret_cast<void*>(asinf));
993 functions.emplace("acosf", reinterpret_cast<void*>(acosf));
994 functions.emplace("atanf", reinterpret_cast<void*>(atanf));
995 functions.emplace("sinhf", reinterpret_cast<void*>(sinhf));
996 functions.emplace("coshf", reinterpret_cast<void*>(coshf));
997 functions.emplace("tanhf", reinterpret_cast<void*>(tanhf));
998 functions.emplace("asinhf", reinterpret_cast<void*>(asinhf));
999 functions.emplace("acoshf", reinterpret_cast<void*>(acoshf));
1000 functions.emplace("atanhf", reinterpret_cast<void*>(atanhf));
1001 functions.emplace("atan2f", reinterpret_cast<void*>(atan2f));
1002 functions.emplace("powf", reinterpret_cast<void*>(powf));
1003 functions.emplace("expf", reinterpret_cast<void*>(expf));
1004 functions.emplace("logf", reinterpret_cast<void*>(logf));
1005 functions.emplace("exp2f", reinterpret_cast<void*>(exp2f));
1006 functions.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton14740062019-04-09 13:48:41 -04001007
Ben Clayton6f8e5652019-06-29 01:58:02 +01001008 functions.emplace("sin", reinterpret_cast<void*>(static_cast<double(*)(double)>(sin)));
1009 functions.emplace("cos", reinterpret_cast<void*>(static_cast<double(*)(double)>(cos)));
1010 functions.emplace("asin", reinterpret_cast<void*>(static_cast<double(*)(double)>(asin)));
1011 functions.emplace("acos", reinterpret_cast<void*>(static_cast<double(*)(double)>(acos)));
1012 functions.emplace("atan", reinterpret_cast<void*>(static_cast<double(*)(double)>(atan)));
1013 functions.emplace("sinh", reinterpret_cast<void*>(static_cast<double(*)(double)>(sinh)));
1014 functions.emplace("cosh", reinterpret_cast<void*>(static_cast<double(*)(double)>(cosh)));
1015 functions.emplace("tanh", reinterpret_cast<void*>(static_cast<double(*)(double)>(tanh)));
1016 functions.emplace("asinh", reinterpret_cast<void*>(static_cast<double(*)(double)>(asinh)));
1017 functions.emplace("acosh", reinterpret_cast<void*>(static_cast<double(*)(double)>(acosh)));
1018 functions.emplace("atanh", reinterpret_cast<void*>(static_cast<double(*)(double)>(atanh)));
1019 functions.emplace("atan2", reinterpret_cast<void*>(static_cast<double(*)(double,double)>(atan2)));
1020 functions.emplace("pow", reinterpret_cast<void*>(static_cast<double(*)(double,double)>(pow)));
1021 functions.emplace("exp", reinterpret_cast<void*>(static_cast<double(*)(double)>(exp)));
1022 functions.emplace("log", reinterpret_cast<void*>(static_cast<double(*)(double)>(log)));
1023 functions.emplace("exp2", reinterpret_cast<void*>(static_cast<double(*)(double)>(exp2)));
1024 functions.emplace("log2", reinterpret_cast<void*>(static_cast<double(*)(double)>(log2)));
1025
1026 functions.emplace("atomic_load", reinterpret_cast<void*>(Atomic::load));
1027 functions.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
1028
1029 // FIXME (b/119409619): use an allocator here so we can control all memory allocations
1030 functions.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(F::coroutine_alloc_frame));
1031 functions.emplace("coroutine_free_frame", reinterpret_cast<void*>(F::coroutine_free_frame));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001032
Ben Clayton14740062019-04-09 13:48:41 -04001033#ifdef __APPLE__
Ben Clayton6f8e5652019-06-29 01:58:02 +01001034 functions.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
Ben Clayton14740062019-04-09 13:48:41 -04001035#elif defined(__linux__)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001036 functions.emplace("sincosf", reinterpret_cast<void*>(sincosf));
Ben Clayton2f58df32019-06-23 21:29:25 +01001037#elif defined(_WIN64)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001038 functions.emplace("chkstk", reinterpret_cast<void*>(__chkstk));
Ben Clayton2f58df32019-06-23 21:29:25 +01001039#elif defined(_WIN32)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001040 functions.emplace("chkstk", reinterpret_cast<void*>(_chkstk));
Ben Clayton2f58df32019-06-23 21:29:25 +01001041#endif
Chris Forbes9283b252019-06-17 09:44:28 -07001042
1043#ifdef __ANDROID__
Ben Clayton6f8e5652019-06-29 01:58:02 +01001044 functions.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void*>(F::neverCalled));
1045 functions.emplace("sync_synchronize", reinterpret_cast<void*>(F::sync_synchronize));
1046 functions.emplace("sync_fetch_and_add_4", reinterpret_cast<void*>(F::sync_fetch_and_add_4));
1047 functions.emplace("sync_fetch_and_and_4", reinterpret_cast<void*>(F::sync_fetch_and_and_4));
1048 functions.emplace("sync_fetch_and_or_4", reinterpret_cast<void*>(F::sync_fetch_and_or_4));
1049 functions.emplace("sync_fetch_and_xor_4", reinterpret_cast<void*>(F::sync_fetch_and_xor_4));
1050 functions.emplace("sync_fetch_and_sub_4", reinterpret_cast<void*>(F::sync_fetch_and_sub_4));
1051 functions.emplace("sync_lock_test_and_set_4", reinterpret_cast<void*>(F::sync_lock_test_and_set_4));
1052 functions.emplace("sync_val_compare_and_swap_4", reinterpret_cast<void*>(F::sync_val_compare_and_swap_4));
1053 functions.emplace("sync_fetch_and_max_4", reinterpret_cast<void*>(F::sync_fetch_and_max_4));
1054 functions.emplace("sync_fetch_and_min_4", reinterpret_cast<void*>(F::sync_fetch_and_min_4));
1055 functions.emplace("sync_fetch_and_umax_4", reinterpret_cast<void*>(F::sync_fetch_and_umax_4));
1056 functions.emplace("sync_fetch_and_umin_4", reinterpret_cast<void*>(F::sync_fetch_and_umin_4));
1057 #endif
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001058 }
Ben Clayton6f8e5652019-06-29 01:58:02 +01001059 };
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001060
Ben Clayton6f8e5652019-06-29 01:58:02 +01001061 static Resolver resolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +08001062
Ben Clayton6f8e5652019-06-29 01:58:02 +01001063 // Trim off any underscores from the start of the symbol. LLVM likes
1064 // to append these on macOS.
1065 const char* trimmed = name;
1066 while (trimmed[0] == '_') { trimmed++; }
Ben Claytoncee3dff2019-05-22 12:01:22 +01001067
Ben Clayton6f8e5652019-06-29 01:58:02 +01001068 auto it = resolver.functions.find(trimmed);
1069 // Missing functions will likely make the module fail in exciting non-obvious ways.
1070 ASSERT_MSG(it != resolver.functions.end(), "Missing external function: '%s'", name);
1071 return it->second;
1072 }
Logan Chien52cde602018-09-03 19:37:57 +08001073
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001074 // The abstract Type* types are implemented as LLVM types, except that
1075 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
1076 // and VFP in ARM, and eliminate the overhead of converting them to explicit
1077 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
1078 // as abstract pointers with small enum values.
1079 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001080 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001081 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001082 Type_v2i32,
1083 Type_v4i16,
1084 Type_v2i16,
1085 Type_v8i8,
1086 Type_v4i8,
1087 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001088 EmulatedTypeCount,
1089 // Returned by asInternalType() to indicate that the abstract Type*
1090 // should be interpreted as LLVM type pointer:
1091 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001092 };
1093
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001094 inline InternalType asInternalType(Type *type)
1095 {
1096 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
1097 return (t < EmulatedTypeCount) ? t : Type_LLVM;
1098 }
1099
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001100 llvm::Type *T(Type *t)
1101 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001102 // Use 128-bit vectors to implement logically shorter ones.
1103 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001104 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001105 case Type_v2i32: return T(Int4::getType());
1106 case Type_v4i16: return T(Short8::getType());
1107 case Type_v2i16: return T(Short8::getType());
1108 case Type_v8i8: return T(Byte16::getType());
1109 case Type_v4i8: return T(Byte16::getType());
1110 case Type_v2f32: return T(Float4::getType());
1111 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -04001112 default:
1113 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
1114 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001115 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001116 }
1117
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001118 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001119 {
1120 return reinterpret_cast<Type*>(t);
1121 }
1122
Nicolas Capensac230122016-09-20 14:30:06 -04001123 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
1124 {
1125 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
1126 }
1127
Logan Chien191b3052018-08-31 16:57:15 +08001128 inline llvm::BasicBlock *B(BasicBlock *t)
1129 {
1130 return reinterpret_cast<llvm::BasicBlock*>(t);
1131 }
1132
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001133 inline BasicBlock *B(llvm::BasicBlock *t)
1134 {
1135 return reinterpret_cast<BasicBlock*>(t);
1136 }
1137
Nicolas Capens01a97962017-07-28 17:30:51 -04001138 static size_t typeSize(Type *type)
1139 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001140 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001141 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001142 case Type_v2i32: return 8;
1143 case Type_v4i16: return 8;
1144 case Type_v2i16: return 4;
1145 case Type_v8i8: return 8;
1146 case Type_v4i8: return 4;
1147 case Type_v2f32: return 8;
1148 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -04001149 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001150 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001151
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001152 if(t->isPointerTy())
1153 {
1154 return sizeof(void*);
1155 }
1156
1157 // At this point we should only have LLVM 'primitive' types.
1158 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -04001159 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001160
1161 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
1162 // but are typically stored as one byte. The DataLayout structure should
1163 // be used here and many other places if this assumption fails.
1164 return (bits + 7) / 8;
1165 }
1166 break;
1167 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001168 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001169 return 0;
1170 }
Nicolas Capens01a97962017-07-28 17:30:51 -04001171 }
1172
Nicolas Capens69674fb2017-09-01 11:08:44 -04001173 static unsigned int elementCount(Type *type)
1174 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001175 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -04001176 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001177 case Type_v2i32: return 2;
1178 case Type_v4i16: return 4;
1179 case Type_v2i16: return 2;
1180 case Type_v8i8: return 8;
1181 case Type_v4i8: return 4;
1182 case Type_v2f32: return 2;
1183 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -04001184 default:
1185 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1186 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -04001187 }
Nicolas Capens69674fb2017-09-01 11:08:44 -04001188 }
1189
Ben Claytonea38f952019-06-17 13:56:56 +01001190 static ::llvm::Function* createFunction(const char *name, ::llvm::Type *retTy, const std::vector<::llvm::Type*> &params)
1191 {
1192 llvm::FunctionType *functionType = llvm::FunctionType::get(retTy, params, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001193 auto func = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, name, jit->module.get());
Ben Claytonea38f952019-06-17 13:56:56 +01001194 func->setDoesNotThrow();
1195 func->setCallingConv(llvm::CallingConv::C);
Ben Claytonea38f952019-06-17 13:56:56 +01001196 return func;
1197 }
1198
John Bauman89401822014-05-06 15:04:28 -04001199 Nucleus::Nucleus()
1200 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -04001201 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -04001202
Ben Clayton6f8e5652019-06-29 01:58:02 +01001203 ASSERT(jit == nullptr);
Ben Clayton55bc37a2019-07-04 12:17:12 +01001204 jit.reset(new JITBuilder(Nucleus::getDefaultConfig()));
John Bauman89401822014-05-06 15:04:28 -04001205 }
1206
1207 Nucleus::~Nucleus()
1208 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001209 jit.reset();
Nicolas Capens3bbc5e12016-09-27 10:49:52 -04001210 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -04001211 }
1212
Ben Clayton55bc37a2019-07-04 12:17:12 +01001213 void Nucleus::setDefaultConfig(const Config &cfg)
John Bauman89401822014-05-06 15:04:28 -04001214 {
Ben Clayton55bc37a2019-07-04 12:17:12 +01001215 std::unique_lock<std::mutex> lock(::defaultConfigLock);
1216 ::defaultConfig() = cfg;
1217 }
1218
1219 void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
1220 {
1221 std::unique_lock<std::mutex> lock(::defaultConfigLock);
1222 auto &config = ::defaultConfig();
1223 config = cfgEdit.apply(config);
1224 }
1225
1226 Config Nucleus::getDefaultConfig()
1227 {
1228 std::unique_lock<std::mutex> lock(::defaultConfigLock);
1229 return ::defaultConfig();
1230 }
1231
Ben Clayton6897e9b2019-07-16 17:27:27 +01001232 std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
Ben Clayton55bc37a2019-07-04 12:17:12 +01001233 {
1234 auto cfg = cfgEdit.apply(jit->config);
1235
Ben Clayton6f8e5652019-06-29 01:58:02 +01001236 if(jit->builder->GetInsertBlock()->empty() || !jit->builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -04001237 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001238 llvm::Type *type = jit->function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -04001239
1240 if(type->isVoidTy())
1241 {
1242 createRetVoid();
1243 }
1244 else
1245 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001246 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -04001247 }
1248 }
John Bauman89401822014-05-06 15:04:28 -04001249
Ben Clayton97c13ad2019-05-02 11:59:30 +01001250#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01001251 if (jit->debugInfo != nullptr)
Ben Clayton97c13ad2019-05-02 11:59:30 +01001252 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001253 jit->debugInfo->Finalize();
Ben Clayton97c13ad2019-05-02 11:59:30 +01001254 }
1255#endif // ENABLE_RR_DEBUG_INFO
1256
John Bauman89401822014-05-06 15:04:28 -04001257 if(false)
1258 {
Ben Clayton5875be52019-04-11 14:57:40 -04001259 std::error_code error;
1260 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001261 jit->module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001262 }
1263
Ben Clayton5375f472019-06-24 13:33:11 +01001264#if defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
Ben Clayton4b944652019-05-02 10:56:19 +01001265 {
1266 llvm::legacy::PassManager pm;
1267 pm.add(llvm::createVerifierPass());
Ben Clayton6f8e5652019-06-29 01:58:02 +01001268 pm.run(*jit->module);
Ben Clayton4b944652019-05-02 10:56:19 +01001269 }
Ben Clayton5375f472019-06-24 13:33:11 +01001270#endif // defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
Ben Clayton4b944652019-05-02 10:56:19 +01001271
Ben Clayton55bc37a2019-07-04 12:17:12 +01001272 jit->optimize(cfg);
John Bauman89401822014-05-06 15:04:28 -04001273
1274 if(false)
1275 {
Ben Clayton5875be52019-04-11 14:57:40 -04001276 std::error_code error;
1277 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001278 jit->module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001279 }
1280
Ben Clayton55bc37a2019-07-04 12:17:12 +01001281 auto routine = jit->acquireRoutine(&jit->function, 1, cfg);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001282 jit.reset();
John Bauman89401822014-05-06 15:04:28 -04001283
John Bauman89401822014-05-06 15:04:28 -04001284 return routine;
1285 }
1286
John Bauman19bac1e2014-05-06 15:23:49 -04001287 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001288 {
1289 // Need to allocate it in the entry block for mem2reg to work
Ben Clayton6f8e5652019-06-29 01:58:02 +01001290 llvm::BasicBlock &entryBlock = jit->function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001291
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001292 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001293
1294 if(arraySize)
1295 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001296 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -04001297 }
1298 else
1299 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001300 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -04001301 }
1302
1303 entryBlock.getInstList().push_front(declaration);
1304
Nicolas Capens19336542016-09-26 10:32:29 -04001305 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001306 }
1307
1308 BasicBlock *Nucleus::createBasicBlock()
1309 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001310 return B(llvm::BasicBlock::Create(jit->context, "", jit->function));
John Bauman89401822014-05-06 15:04:28 -04001311 }
1312
1313 BasicBlock *Nucleus::getInsertBlock()
1314 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001315 return B(jit->builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001316 }
1317
1318 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1319 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001320 // assert(jit->builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001321
1322 Variable::materializeAll();
1323
Ben Clayton6f8e5652019-06-29 01:58:02 +01001324 jit->builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001325 }
1326
Nicolas Capensac230122016-09-20 14:30:06 -04001327 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001328 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001329 jit->function = rr::createFunction("", T(ReturnType), T(Params));
Nicolas Capens52551d12018-09-13 14:30:56 -04001330
Ben Claytonac07ed82019-03-26 14:17:41 +00001331#ifdef ENABLE_RR_DEBUG_INFO
Ben Claytone031f362019-07-20 12:35:40 +01001332 jit->debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(jit->builder.get(), &jit->context, jit->module.get(), jit->function));
Ben Claytonac07ed82019-03-26 14:17:41 +00001333#endif // ENABLE_RR_DEBUG_INFO
1334
Ben Clayton6f8e5652019-06-29 01:58:02 +01001335 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->function));
John Bauman89401822014-05-06 15:04:28 -04001336 }
1337
Nicolas Capens19336542016-09-26 10:32:29 -04001338 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001339 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001340 llvm::Function::arg_iterator args = jit->function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001341
1342 while(index)
1343 {
1344 args++;
1345 index--;
1346 }
1347
Nicolas Capens19336542016-09-26 10:32:29 -04001348 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001349 }
1350
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001351 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001352 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001353 RR_DEBUG_INFO_UPDATE_LOC();
1354
Ben Clayton6f8e5652019-06-29 01:58:02 +01001355 ASSERT_MSG(jit->function->getReturnType() == T(Void::getType()), "Return type mismatch");
Ben Claytonc958b172019-05-02 12:20:59 +01001356
Nicolas Capens0192d152019-03-27 14:46:07 -04001357 // Code generated after this point is unreachable, so any variables
1358 // being read can safely return an undefined value. We have to avoid
1359 // materializing variables after the terminator ret instruction.
1360 Variable::killUnmaterialized();
1361
Ben Clayton6f8e5652019-06-29 01:58:02 +01001362 jit->builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001363 }
1364
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001365 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001366 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001367 RR_DEBUG_INFO_UPDATE_LOC();
1368
Ben Clayton6f8e5652019-06-29 01:58:02 +01001369 ASSERT_MSG(jit->function->getReturnType() == V(v)->getType(), "Return type mismatch");
Ben Claytonc958b172019-05-02 12:20:59 +01001370
Nicolas Capens0192d152019-03-27 14:46:07 -04001371 // Code generated after this point is unreachable, so any variables
1372 // being read can safely return an undefined value. We have to avoid
1373 // materializing variables after the terminator ret instruction.
1374 Variable::killUnmaterialized();
1375
Ben Clayton6f8e5652019-06-29 01:58:02 +01001376 jit->builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001377 }
1378
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001379 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001380 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001381 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001382 Variable::materializeAll();
1383
Ben Clayton6f8e5652019-06-29 01:58:02 +01001384 jit->builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001385 }
1386
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001387 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001388 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001389 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001390 Variable::materializeAll();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001391 jit->builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001392 }
1393
1394 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1395 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001396 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001397 return V(jit->builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001398 }
1399
1400 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1401 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001402 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001403 return V(jit->builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001404 }
1405
1406 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1407 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001408 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001409 return V(jit->builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001410 }
1411
1412 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1413 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001414 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001415 return V(jit->builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001416 }
1417
1418 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1419 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001420 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001421 return V(jit->builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001422 }
1423
1424 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1425 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001426 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001427 return V(jit->builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001428 }
1429
1430 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1431 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001432 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001433 return V(jit->builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001434 }
1435
1436 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1437 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001438 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001439 return V(jit->builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001440 }
1441
1442 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1443 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001444 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001445 return V(jit->builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001446 }
1447
1448 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1449 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001450 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001451 return V(jit->builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001452 }
1453
1454 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1455 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001456 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001457 return V(jit->builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001458 }
1459
1460 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1461 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001462 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001463 return V(jit->builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001464 }
1465
1466 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1467 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001468 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001469 return V(jit->builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001470 }
1471
1472 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1473 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001474 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001475 return V(jit->builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001476 }
1477
1478 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1479 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001480 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001481 return V(jit->builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001482 }
1483
1484 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1485 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001486 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001487 return V(jit->builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001488 }
1489
1490 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1491 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001492 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001493 return V(jit->builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001494 }
1495
1496 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1497 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001498 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001499 return V(jit->builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001500 }
1501
Nicolas Capens19336542016-09-26 10:32:29 -04001502 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001503 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001504 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001505 return V(jit->builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001506 }
1507
Nicolas Capens19336542016-09-26 10:32:29 -04001508 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001509 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001510 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001511 return V(jit->builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001512 }
1513
Nicolas Capens19336542016-09-26 10:32:29 -04001514 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001515 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001516 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001517 return V(jit->builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001518 }
1519
Nicolas Capens86509d92019-03-21 13:23:50 -04001520 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001521 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001522 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001523 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001524 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001525 case Type_v2i32:
1526 case Type_v4i16:
1527 case Type_v8i8:
1528 case Type_v2f32:
1529 return createBitCast(
1530 createInsertElement(
1531 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001532 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001533 0),
1534 type);
1535 case Type_v2i16:
1536 case Type_v4i8:
1537 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001538 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001539 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001540 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001541 i = createZExt(i, Long::getType());
1542 Value *v = createInsertElement(u, i, 0);
1543 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001544 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001545 // Fallthrough to non-emulated case.
1546 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001547 {
Ben Clayton99e57192019-05-03 13:25:08 +01001548 auto elTy = T(type);
1549 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001550
1551 if (!atomic)
Ben Clayton99e57192019-05-03 13:25:08 +01001552 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001553 return V(jit->builder->CreateAlignedLoad(V(ptr), alignment, isVolatile));
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001554 }
1555 else if (elTy->isIntegerTy() || elTy->isPointerTy())
1556 {
1557 // Integers and pointers can be atomically loaded by setting
1558 // the ordering constraint on the load instruction.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001559 auto load = jit->builder->CreateAlignedLoad(V(ptr), alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001560 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1561 return V(load);
1562 }
1563 else if (elTy->isFloatTy() || elTy->isDoubleTy())
1564 {
1565 // LLVM claims to support atomic loads of float types as
1566 // above, but certain backends cannot deal with this.
1567 // Load as an integer and bitcast. See b/136037244.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001568 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
1569 auto elAsIntTy = ::llvm::IntegerType::get(jit->context, size * 8);
1570 auto ptrCast = jit->builder->CreatePointerCast(V(ptr), elAsIntTy->getPointerTo());
1571 auto load = jit->builder->CreateAlignedLoad(ptrCast, alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001572 load->setAtomic(atomicOrdering(atomic, memoryOrder));
Ben Clayton6f8e5652019-06-29 01:58:02 +01001573 auto loadCast = jit->builder->CreateBitCast(load, elTy);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001574 return V(loadCast);
1575 }
1576 else
1577 {
1578 // More exotic types require falling back to the extern:
1579 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001580 auto sizetTy = ::llvm::IntegerType::get(jit->context, sizeof(size_t) * 8);
1581 auto intTy = ::llvm::IntegerType::get(jit->context, sizeof(int) * 8);
1582 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001583 auto i8PtrTy = i8Ty->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001584 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001585 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001586 auto func = jit->module->getOrInsertFunction("__atomic_load", funcTy);
1587 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
Ben Clayton99e57192019-05-03 13:25:08 +01001588 auto out = allocateStackVariable(type);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001589 jit->builder->CreateCall(func, {
Ben Clayton99e57192019-05-03 13:25:08 +01001590 ::llvm::ConstantInt::get(sizetTy, size),
Ben Clayton6f8e5652019-06-29 01:58:02 +01001591 jit->builder->CreatePointerCast(V(ptr), i8PtrTy),
1592 jit->builder->CreatePointerCast(V(out), i8PtrTy),
Ben Clayton99e57192019-05-03 13:25:08 +01001593 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1594 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01001595 return V(jit->builder->CreateLoad(V(out)));
Ben Clayton99e57192019-05-03 13:25:08 +01001596 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001597 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001598 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001599 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1600 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001601 }
John Bauman89401822014-05-06 15:04:28 -04001602 }
1603
Nicolas Capens86509d92019-03-21 13:23:50 -04001604 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001605 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001606 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001607 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001608 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001609 case Type_v2i32:
1610 case Type_v4i16:
1611 case Type_v8i8:
1612 case Type_v2f32:
1613 createStore(
1614 createExtractElement(
1615 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1616 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001617 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001618 return value;
1619 case Type_v2i16:
1620 case Type_v4i8:
1621 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001622 {
Logan Chien191b3052018-08-31 16:57:15 +08001623 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001624 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1625 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001626 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001627 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001628 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001629 // Fallthrough to non-emulated case.
1630 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001631 {
Ben Clayton99e57192019-05-03 13:25:08 +01001632 auto elTy = T(type);
1633 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001634
1635 if (!atomic)
Ben Clayton99e57192019-05-03 13:25:08 +01001636 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001637 jit->builder->CreateAlignedStore(V(value), V(ptr), alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001638 }
1639 else if (elTy->isIntegerTy() || elTy->isPointerTy())
1640 {
1641 // Integers and pointers can be atomically stored by setting
1642 // the ordering constraint on the store instruction.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001643 auto store = jit->builder->CreateAlignedStore(V(value), V(ptr), alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001644 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1645 }
1646 else if (elTy->isFloatTy() || elTy->isDoubleTy())
1647 {
1648 // LLVM claims to support atomic stores of float types as
1649 // above, but certain backends cannot deal with this.
1650 // Store as an bitcast integer. See b/136037244.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001651 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
1652 auto elAsIntTy = ::llvm::IntegerType::get(jit->context, size * 8);
1653 auto valCast = jit->builder->CreateBitCast(V(value), elAsIntTy);
1654 auto ptrCast = jit->builder->CreatePointerCast(V(ptr), elAsIntTy->getPointerTo());
1655 auto store = jit->builder->CreateAlignedStore(valCast, ptrCast, alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001656 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1657 }
1658 else
1659 {
1660 // More exotic types require falling back to the extern:
1661 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001662 auto sizetTy = ::llvm::IntegerType::get(jit->context, sizeof(size_t) * 8);
1663 auto intTy = ::llvm::IntegerType::get(jit->context, sizeof(int) * 8);
1664 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001665 auto i8PtrTy = i8Ty->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001666 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001667 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001668 auto func = jit->module->getOrInsertFunction("__atomic_store", funcTy);
1669 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
Ben Clayton99e57192019-05-03 13:25:08 +01001670 auto copy = allocateStackVariable(type);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001671 jit->builder->CreateStore(V(value), V(copy));
1672 jit->builder->CreateCall(func, {
Ben Clayton99e57192019-05-03 13:25:08 +01001673 ::llvm::ConstantInt::get(sizetTy, size),
Ben Clayton6f8e5652019-06-29 01:58:02 +01001674 jit->builder->CreatePointerCast(V(ptr), i8PtrTy),
1675 jit->builder->CreatePointerCast(V(copy), i8PtrTy),
Ben Clayton99e57192019-05-03 13:25:08 +01001676 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1677 });
1678 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001679
1680 return value;
1681 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001682 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001683 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1684 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001685 }
John Bauman89401822014-05-06 15:04:28 -04001686 }
1687
Ben Clayton0b00b952019-07-03 15:51:19 +01001688 Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001689 {
1690 ASSERT(V(ptr)->getType()->isPointerTy());
1691 ASSERT(V(mask)->getType()->isVectorTy());
1692
1693 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001694 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1695 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001696 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1697 auto elVecPtrTy = elVecTy->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001698 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Clayton0b00b952019-07-03 15:51:19 +01001699 auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001700 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001701 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_load, { elVecTy, elVecPtrTy } );
1702 return V(jit->builder->CreateCall(func, { V(ptr), align, i8Mask, passthrough }));
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001703 }
1704
1705 void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
1706 {
1707 ASSERT(V(ptr)->getType()->isPointerTy());
1708 ASSERT(V(val)->getType()->isVectorTy());
1709 ASSERT(V(mask)->getType()->isVectorTy());
1710
1711 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001712 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1713 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001714 auto elVecTy = V(val)->getType();
1715 auto elVecPtrTy = elVecTy->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001716 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001717 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001718 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_store, { elVecTy, elVecPtrTy } );
1719 jit->builder->CreateCall(func, { V(val), V(ptr), align, i8Mask });
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001720 }
1721
Ben Clayton0b00b952019-07-03 15:51:19 +01001722 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
Ben Clayton0fc611f2019-04-18 11:23:27 -04001723 {
1724 ASSERT(V(base)->getType()->isPointerTy());
1725 ASSERT(V(offsets)->getType()->isVectorTy());
1726 ASSERT(V(mask)->getType()->isVectorTy());
1727
1728 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001729 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1730 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
1731 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton0fc611f2019-04-18 11:23:27 -04001732 auto i8PtrTy = i8Ty->getPointerTo();
1733 auto elPtrTy = T(elTy)->getPointerTo();
1734 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1735 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001736 auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
1737 auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
1738 auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1739 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Clayton0b00b952019-07-03 15:51:19 +01001740 auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
Ben Clayton0fc611f2019-04-18 11:23:27 -04001741 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001742 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
1743 return V(jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
Ben Clayton0fc611f2019-04-18 11:23:27 -04001744 }
1745
1746 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
1747 {
1748 ASSERT(V(base)->getType()->isPointerTy());
1749 ASSERT(V(val)->getType()->isVectorTy());
1750 ASSERT(V(offsets)->getType()->isVectorTy());
1751 ASSERT(V(mask)->getType()->isVectorTy());
1752
1753 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001754 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1755 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
1756 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton0fc611f2019-04-18 11:23:27 -04001757 auto i8PtrTy = i8Ty->getPointerTo();
1758 auto elVecTy = V(val)->getType();
1759 auto elTy = elVecTy->getVectorElementType();
1760 auto elPtrTy = elTy->getPointerTo();
1761 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001762 auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
1763 auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
1764 auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1765 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Clayton0fc611f2019-04-18 11:23:27 -04001766 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001767 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
1768 jit->builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
Ben Clayton0fc611f2019-04-18 11:23:27 -04001769 }
1770
Ben Claytonb16c5862019-05-08 14:01:38 +01001771 void Nucleus::createFence(std::memory_order memoryOrder)
1772 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001773 jit->builder->CreateFence(atomicOrdering(true, memoryOrder));
Ben Claytonb16c5862019-05-08 14:01:38 +01001774 }
1775
Nicolas Capensd294def2017-01-26 17:44:37 -08001776 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001777 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001778 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001779 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001780 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001781 {
Ben Claytonb1243732019-02-27 23:56:18 +00001782 // LLVM manual: "When indexing into an array, pointer or vector,
1783 // integers of any width are allowed, and they are not required to
1784 // be constant. These integers are treated as signed values where
1785 // relevant."
1786 //
1787 // Thus if we want indexes to be treated as unsigned we have to
1788 // zero-extend them ourselves.
1789 //
1790 // Note that this is not because we want to address anywhere near
1791 // 4 GB of data. Instead this is important for performance because
1792 // x86 supports automatic zero-extending of 32-bit registers to
1793 // 64-bit. Thus when indexing into an array using a uint32 is
1794 // actually faster than an int32.
1795 index = unsignedIndex ?
1796 createZExt(index, Long::getType()) :
1797 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001798 }
Ben Claytonb1243732019-02-27 23:56:18 +00001799
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001800 // For non-emulated types we can rely on LLVM's GEP to calculate the
1801 // effective address correctly.
1802 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001803 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001804 return V(jit->builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001805 }
1806
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001807 // For emulated types we have to multiply the index by the intended
1808 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001809 index = (sizeof(void*) == 8) ?
1810 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1811 createMul(index, createConstantInt((int)typeSize(type)));
1812
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001813 // Cast to a byte pointer, apply the byte offset, and cast back to the
1814 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001815 return createBitCast(
Ben Clayton6f8e5652019-06-29 01:58:02 +01001816 V(jit->builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
Logan Chien191b3052018-08-31 16:57:15 +08001817 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001818 }
1819
Chris Forbes17813932019-04-18 11:45:54 -07001820 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001821 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001822 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001823 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001824 }
1825
Chris Forbes707ed992019-04-18 18:17:35 -07001826 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1827 {
1828 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001829 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Sub, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes707ed992019-04-18 18:17:35 -07001830 }
1831
Chris Forbes17813932019-04-18 11:45:54 -07001832 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1833 {
1834 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001835 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001836 }
1837
1838 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1839 {
1840 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001841 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001842 }
1843
1844 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1845 {
1846 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001847 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001848 }
1849
1850 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1851 {
1852 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001853 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001854 }
1855
1856 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1857 {
1858 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001859 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001860 }
1861
Chris Forbesf31bdad2019-05-23 14:58:08 -07001862 Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1863 {
1864 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001865 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMin, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbesf31bdad2019-05-23 14:58:08 -07001866 }
1867
1868 Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1869 {
1870 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001871 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMax, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbesf31bdad2019-05-23 14:58:08 -07001872 }
1873
1874
Chris Forbes17813932019-04-18 11:45:54 -07001875 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1876 {
1877 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001878 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001879 }
1880
Chris Forbesa16238d2019-04-18 16:31:54 -07001881 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1882 {
1883 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001884 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001885 return V(jit->builder->CreateExtractValue(
1886 jit->builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
Chris Forbesa16238d2019-04-18 16:31:54 -07001887 llvm::ArrayRef<unsigned>(0u)));
1888 }
1889
Nicolas Capens19336542016-09-26 10:32:29 -04001890 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001891 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001892 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001893 return V(jit->builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001894 }
1895
Nicolas Capens19336542016-09-26 10:32:29 -04001896 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001897 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001898 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001899 return V(jit->builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001900 }
1901
Nicolas Capens19336542016-09-26 10:32:29 -04001902 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001903 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001904 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001905 return V(jit->builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001906 }
1907
Nicolas Capens19336542016-09-26 10:32:29 -04001908 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001909 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001910 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001911 return V(jit->builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001912 }
1913
Nicolas Capens19336542016-09-26 10:32:29 -04001914 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001915 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001916 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001917 return V(jit->builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001918 }
1919
Nicolas Capens19336542016-09-26 10:32:29 -04001920 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001921 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001922 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001923 return V(jit->builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001924 }
1925
Nicolas Capens19336542016-09-26 10:32:29 -04001926 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001927 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001928 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001929 return V(jit->builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001930 }
1931
Nicolas Capens19336542016-09-26 10:32:29 -04001932 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001933 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001934 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001935 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1936 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1937 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001938 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001939 {
1940 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001941 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1942 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001943 return createLoad(readAddress, destType);
1944 }
Logan Chien191b3052018-08-31 16:57:15 +08001945 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001946 {
Logan Chien191b3052018-08-31 16:57:15 +08001947 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1948 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001949 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1950 return createLoad(readAddress, destType);
1951 }
1952
Ben Clayton6f8e5652019-06-29 01:58:02 +01001953 return V(jit->builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001954 }
1955
John Bauman89401822014-05-06 15:04:28 -04001956 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1957 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001958 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001959 return V(jit->builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001960 }
1961
1962 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1963 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001964 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001965 return V(jit->builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001966 }
1967
1968 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1969 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001970 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001971 return V(jit->builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001972 }
1973
1974 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1975 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001976 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001977 return V(jit->builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001978 }
1979
1980 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1981 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001982 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001983 return V(jit->builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001984 }
1985
1986 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1987 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001988 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001989 return V(jit->builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001990 }
1991
1992 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1993 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001994 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001995 return V(jit->builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001996 }
1997
1998 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1999 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002000 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002001 return V(jit->builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002002 }
2003
2004 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
2005 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002006 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002007 return V(jit->builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002008 }
2009
2010 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
2011 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002012 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002013 return V(jit->builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002014 }
2015
2016 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
2017 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002018 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002019 return V(jit->builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002020 }
2021
2022 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
2023 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002024 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002025 return V(jit->builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002026 }
2027
2028 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
2029 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002030 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002031 return V(jit->builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002032 }
2033
2034 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
2035 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002036 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002037 return V(jit->builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002038 }
2039
2040 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
2041 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002042 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002043 return V(jit->builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002044 }
2045
2046 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
2047 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002048 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002049 return V(jit->builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002050 }
2051
2052 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
2053 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002054 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002055 return V(jit->builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002056 }
2057
2058 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
2059 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002060 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002061 return V(jit->builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002062 }
2063
2064 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
2065 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002066 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002067 return V(jit->builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002068 }
2069
2070 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
2071 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002072 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002073 return V(jit->builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002074 }
2075
2076 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
2077 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002078 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002079 return V(jit->builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002080 }
2081
2082 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
2083 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002084 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002085 return V(jit->builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002086 }
2087
2088 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
2089 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002090 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002091 return V(jit->builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002092 }
2093
2094 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
2095 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002096 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002097 return V(jit->builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002098 }
2099
Nicolas Capense95d5342016-09-30 11:37:28 -04002100 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04002101 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002102 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04002103 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Ben Clayton6f8e5652019-06-29 01:58:02 +01002104 return V(jit->builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04002105 }
2106
2107 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
2108 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002109 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002110 return V(jit->builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04002111 }
2112
Logan Chien191b3052018-08-31 16:57:15 +08002113 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04002114 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002115 RR_DEBUG_INFO_UPDATE_LOC();
2116
Logan Chien191b3052018-08-31 16:57:15 +08002117 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04002118 const int maxSize = 16;
2119 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04002120 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04002121
2122 for(int i = 0; i < size; i++)
2123 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002124 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04002125 }
2126
2127 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
2128
Ben Clayton6f8e5652019-06-29 01:58:02 +01002129 return V(jit->builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04002130 }
2131
Logan Chien191b3052018-08-31 16:57:15 +08002132 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04002133 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002134 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002135 return V(jit->builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04002136 }
2137
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05002138 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04002139 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002140 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002141 return reinterpret_cast<SwitchCases*>(jit->builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04002142 }
2143
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05002144 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04002145 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002146 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002147 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
Ben Clayton6f8e5652019-06-29 01:58:02 +01002148 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04002149 }
2150
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04002151 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04002152 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002153 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002154 jit->builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04002155 }
2156
Nicolas Capensac230122016-09-20 14:30:06 -04002157 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04002158 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002159 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04002160 }
2161
Nicolas Capens13ac2322016-10-13 14:52:12 -04002162 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04002163 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002164 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002165 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04002166 }
2167
Nicolas Capens13ac2322016-10-13 14:52:12 -04002168 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04002169 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002170 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002171 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002172 }
2173
Nicolas Capens13ac2322016-10-13 14:52:12 -04002174 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04002175 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002176 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002177 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002178 }
2179
Nicolas Capens13ac2322016-10-13 14:52:12 -04002180 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04002181 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002182 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002183 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), i, false));
John Bauman89401822014-05-06 15:04:28 -04002184 }
2185
Nicolas Capens13ac2322016-10-13 14:52:12 -04002186 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04002187 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002188 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002189 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(jit->context), b));
John Bauman89401822014-05-06 15:04:28 -04002190 }
2191
Nicolas Capens13ac2322016-10-13 14:52:12 -04002192 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04002193 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002194 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002195 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002196 }
2197
Nicolas Capens13ac2322016-10-13 14:52:12 -04002198 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04002199 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002200 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002201 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(jit->context), i, false));
John Bauman89401822014-05-06 15:04:28 -04002202 }
2203
Nicolas Capens13ac2322016-10-13 14:52:12 -04002204 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04002205 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002206 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002207 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002208 }
2209
Nicolas Capens13ac2322016-10-13 14:52:12 -04002210 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04002211 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002212 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002213 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(jit->context), i, false));
John Bauman89401822014-05-06 15:04:28 -04002214 }
2215
Nicolas Capens13ac2322016-10-13 14:52:12 -04002216 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04002217 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002218 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002219 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04002220 }
2221
Nicolas Capens13ac2322016-10-13 14:52:12 -04002222 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04002223 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002224 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002225 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04002226 }
2227
Nicolas Capens13ac2322016-10-13 14:52:12 -04002228 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04002229 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002230 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04002231 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
2232 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04002233 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002234 llvm::Constant *constantVector[16];
2235
Nicolas Capens69674fb2017-09-01 11:08:44 -04002236 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04002237 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04002238 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002239 }
2240
Nicolas Capens69674fb2017-09-01 11:08:44 -04002241 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04002242 }
2243
2244 Value *Nucleus::createConstantVector(const double *constants, Type *type)
2245 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002246 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04002247 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
2248 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04002249 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002250 llvm::Constant *constantVector[8];
2251
Nicolas Capens69674fb2017-09-01 11:08:44 -04002252 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04002253 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04002254 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002255 }
2256
Nicolas Capens69674fb2017-09-01 11:08:44 -04002257 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04002258 }
2259
John Bauman19bac1e2014-05-06 15:23:49 -04002260 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04002261 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002262 return T(llvm::Type::getVoidTy(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002263 }
2264
John Bauman19bac1e2014-05-06 15:23:49 -04002265 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04002266 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002267 return T(llvm::Type::getInt1Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002268 }
2269
John Bauman19bac1e2014-05-06 15:23:49 -04002270 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04002271 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002272 return T(llvm::Type::getInt8Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002273 }
2274
John Bauman19bac1e2014-05-06 15:23:49 -04002275 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04002276 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002277 return T(llvm::Type::getInt8Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002278 }
2279
John Bauman19bac1e2014-05-06 15:23:49 -04002280 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04002281 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002282 return T(llvm::Type::getInt16Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002283 }
2284
John Bauman19bac1e2014-05-06 15:23:49 -04002285 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04002286 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002287 return T(llvm::Type::getInt16Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002288 }
2289
John Bauman19bac1e2014-05-06 15:23:49 -04002290 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04002291 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002292 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04002293 }
2294
John Bauman19bac1e2014-05-06 15:23:49 -04002295 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04002296 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002297 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04002298 }
2299
John Bauman19bac1e2014-05-06 15:23:49 -04002300 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002301 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002302 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002303#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002304 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002305#else
2306 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2307#endif
John Bauman89401822014-05-06 15:04:28 -04002308 }
John Bauman66b8ab22014-05-06 15:57:45 -04002309
John Bauman19bac1e2014-05-06 15:23:49 -04002310 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002311 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002312 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002313#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002314 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002315#else
2316 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2317#endif
John Bauman89401822014-05-06 15:04:28 -04002318 }
2319
John Bauman19bac1e2014-05-06 15:23:49 -04002320 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04002321 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002322 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002323#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002324 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08002325#else
2326 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2327#endif
John Bauman89401822014-05-06 15:04:28 -04002328 }
2329
John Bauman19bac1e2014-05-06 15:23:49 -04002330// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002331// {
Logan Chiene3191012018-08-24 22:01:50 +08002332//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002333// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08002334//#else
2335// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2336//#endif
John Bauman89401822014-05-06 15:04:28 -04002337// }
John Bauman66b8ab22014-05-06 15:57:45 -04002338
John Bauman19bac1e2014-05-06 15:23:49 -04002339 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002340 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002341 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002342#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002343 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002344#else
2345 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2346#endif
John Bauman89401822014-05-06 15:04:28 -04002347 }
2348
John Bauman19bac1e2014-05-06 15:23:49 -04002349 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002350 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002351 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002352 }
2353
John Bauman19bac1e2014-05-06 15:23:49 -04002354 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002355 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002356 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002357#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002358 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002359#else
2360 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2361#endif
John Bauman89401822014-05-06 15:04:28 -04002362 }
John Bauman66b8ab22014-05-06 15:57:45 -04002363
John Bauman19bac1e2014-05-06 15:23:49 -04002364 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002365 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002366 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002367#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002368 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002369#else
2370 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2371#endif
John Bauman89401822014-05-06 15:04:28 -04002372 }
2373
John Bauman19bac1e2014-05-06 15:23:49 -04002374 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04002375 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002376 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002377#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002378 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002379#else
2380 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2381#endif
John Bauman89401822014-05-06 15:04:28 -04002382 }
2383
John Bauman19bac1e2014-05-06 15:23:49 -04002384 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002385 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002386 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002387#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002388 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002389#else
2390 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2391#endif
John Bauman89401822014-05-06 15:04:28 -04002392 }
John Bauman66b8ab22014-05-06 15:57:45 -04002393
John Bauman19bac1e2014-05-06 15:23:49 -04002394 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002395 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002396 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002397#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002398 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08002399#else
2400 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2401#endif
John Bauman89401822014-05-06 15:04:28 -04002402 }
2403
John Bauman19bac1e2014-05-06 15:23:49 -04002404 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002405 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002406 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002407 }
2408
John Bauman19bac1e2014-05-06 15:23:49 -04002409 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002410 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002411 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002412 }
2413
John Bauman19bac1e2014-05-06 15:23:49 -04002414 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002415 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002416 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002417 }
2418
Nicolas Capens16b5f152016-10-13 13:39:01 -04002419 Type *Short2::getType()
2420 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002421 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002422 }
2423
Nicolas Capens16b5f152016-10-13 13:39:01 -04002424 Type *UShort2::getType()
2425 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002426 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002427 }
2428
John Bauman19bac1e2014-05-06 15:23:49 -04002429 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04002430 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002431 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04002432 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04002433 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2434
Nicolas Capens01a97962017-07-28 17:30:51 -04002435 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2436 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04002437
John Bauman66b8ab22014-05-06 15:57:45 -04002438 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04002439 }
2440
John Bauman19bac1e2014-05-06 15:23:49 -04002441// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002442// {
2443// }
2444
John Bauman19bac1e2014-05-06 15:23:49 -04002445 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002446 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002447 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002448 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002449#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002450 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08002451#else
2452 Value *v = v4i32.loadValue();
2453 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
2454#endif
John Bauman66b8ab22014-05-06 15:57:45 -04002455
2456 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04002457 }
2458
John Bauman19bac1e2014-05-06 15:23:49 -04002459 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002460 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002461 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002462#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002463 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2464
2465 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002466#else
2467 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
2468#endif
John Bauman89401822014-05-06 15:04:28 -04002469 }
2470
John Bauman19bac1e2014-05-06 15:23:49 -04002471 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002472 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002473 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002474#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002475 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002476#else
2477 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2478#endif
John Bauman89401822014-05-06 15:04:28 -04002479 }
2480
John Bauman19bac1e2014-05-06 15:23:49 -04002481 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002482 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002483 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002484#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002485 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002486#else
2487 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
2488#endif
John Bauman89401822014-05-06 15:04:28 -04002489 }
2490
John Bauman19bac1e2014-05-06 15:23:49 -04002491 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002492 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002493 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002494#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002495 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002496#else
2497 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
2498#endif
John Bauman89401822014-05-06 15:04:28 -04002499 }
2500
John Bauman19bac1e2014-05-06 15:23:49 -04002501 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002502 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002503 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002504#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002505 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002506#else
2507 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2508#endif
John Bauman89401822014-05-06 15:04:28 -04002509 }
2510
John Bauman19bac1e2014-05-06 15:23:49 -04002511 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002512 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002513 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002514#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002515 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002516#else
2517 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2518#endif
John Bauman89401822014-05-06 15:04:28 -04002519 }
2520
John Bauman19bac1e2014-05-06 15:23:49 -04002521 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002522 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002523 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002524#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002525 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002526#else
2527 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2528#endif
John Bauman89401822014-05-06 15:04:28 -04002529 }
2530
John Bauman19bac1e2014-05-06 15:23:49 -04002531 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002532 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002533 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002534#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002535 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002536#else
2537 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
2538#endif
John Bauman89401822014-05-06 15:04:28 -04002539 }
2540
Nicolas Capens33438a62017-09-27 11:47:35 -04002541 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002542 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002543 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002544#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002545 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002546#else
2547 auto result = V(lowerPack(V(x.value), V(y.value), true));
2548#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002549 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002550 }
2551
Nicolas Capens33438a62017-09-27 11:47:35 -04002552 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2553 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002554 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002555#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002556 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002557#else
2558 auto result = V(lowerPack(V(x.value), V(y.value), false));
2559#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002560 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2561 }
2562
John Bauman19bac1e2014-05-06 15:23:49 -04002563 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002564 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002565 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002566#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002567 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002568#else
2569 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2570#endif
John Bauman89401822014-05-06 15:04:28 -04002571 }
2572
John Bauman19bac1e2014-05-06 15:23:49 -04002573 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002574 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002575 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002576#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002577 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002578#else
2579 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2580#endif
John Bauman89401822014-05-06 15:04:28 -04002581 }
2582
John Bauman19bac1e2014-05-06 15:23:49 -04002583 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002584 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002585 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002586 }
2587
John Bauman19bac1e2014-05-06 15:23:49 -04002588 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002589 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002590 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002591 if(saturate)
2592 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002593#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002594 if(CPUID::supportsSSE4_1())
2595 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002596 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002597 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002598 }
2599 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002600#endif
John Bauman89401822014-05-06 15:04:28 -04002601 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002602 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002603 }
2604 }
2605 else
2606 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002607 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002608 }
2609 }
2610
John Bauman19bac1e2014-05-06 15:23:49 -04002611 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002612 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002613 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002614#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002615 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2616
2617 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002618#else
2619 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2620#endif
John Bauman89401822014-05-06 15:04:28 -04002621 }
2622
John Bauman19bac1e2014-05-06 15:23:49 -04002623 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002624 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002625 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002626#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002627 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2628
2629 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002630#else
2631 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2632#endif
John Bauman89401822014-05-06 15:04:28 -04002633 }
2634
John Bauman19bac1e2014-05-06 15:23:49 -04002635 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002636 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002637 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002638 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002639 }
2640
John Bauman19bac1e2014-05-06 15:23:49 -04002641 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002642 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002643 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002644 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002645 }
2646
John Bauman19bac1e2014-05-06 15:23:49 -04002647 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002648 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002649 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002650#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002651 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002652#else
2653 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2654#endif
John Bauman89401822014-05-06 15:04:28 -04002655 }
2656
John Bauman19bac1e2014-05-06 15:23:49 -04002657 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002658 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002659 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002660#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002661 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002662#else
2663 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2664#endif
John Bauman89401822014-05-06 15:04:28 -04002665 }
2666
John Bauman19bac1e2014-05-06 15:23:49 -04002667 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002668 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002669 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002670#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002671 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002672#else
2673 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2674#endif
John Bauman89401822014-05-06 15:04:28 -04002675 }
2676
John Bauman19bac1e2014-05-06 15:23:49 -04002677 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002678 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002679 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002680#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002681 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002682#else
2683 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2684#endif
John Bauman89401822014-05-06 15:04:28 -04002685 }
2686
John Bauman19bac1e2014-05-06 15:23:49 -04002687 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002688 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002689 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002690 }
2691
John Bauman19bac1e2014-05-06 15:23:49 -04002692 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002693 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002694 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002695#if defined(__i386__) || defined(__x86_64__)
2696 return x86::psllw(lhs, rhs);
2697#else
2698 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2699#endif
John Bauman89401822014-05-06 15:04:28 -04002700 }
2701
John Bauman19bac1e2014-05-06 15:23:49 -04002702 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002703 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002704 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002705#if defined(__i386__) || defined(__x86_64__)
2706 return x86::psraw(lhs, rhs);
2707#else
2708 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2709#endif
John Bauman89401822014-05-06 15:04:28 -04002710 }
2711
John Bauman19bac1e2014-05-06 15:23:49 -04002712 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002713 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002714 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002715#if defined(__i386__) || defined(__x86_64__)
2716 return x86::pmaddwd(x, y);
2717#else
2718 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2719#endif
John Bauman89401822014-05-06 15:04:28 -04002720 }
2721
John Bauman19bac1e2014-05-06 15:23:49 -04002722 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002723 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002724 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002725#if defined(__i386__) || defined(__x86_64__)
2726 return x86::pmulhw(x, y);
2727#else
2728 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2729#endif
John Bauman89401822014-05-06 15:04:28 -04002730 }
2731
John Bauman19bac1e2014-05-06 15:23:49 -04002732 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002733 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002734 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002735 }
2736
John Bauman19bac1e2014-05-06 15:23:49 -04002737 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002738 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002739 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002740#if defined(__i386__) || defined(__x86_64__)
2741 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2742#else
2743 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2744#endif
John Bauman89401822014-05-06 15:04:28 -04002745 }
2746
John Bauman19bac1e2014-05-06 15:23:49 -04002747 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002748 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002749 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002750#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002751 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002752#else
2753 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2754#endif
John Bauman89401822014-05-06 15:04:28 -04002755 }
2756
John Bauman19bac1e2014-05-06 15:23:49 -04002757 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002758 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002759 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002760 int pshufb[16] =
2761 {
2762 select0 + 0,
2763 select0 + 1,
2764 select1 + 0,
2765 select1 + 1,
2766 select2 + 0,
2767 select2 + 1,
2768 select3 + 0,
2769 select3 + 1,
2770 select4 + 0,
2771 select4 + 1,
2772 select5 + 0,
2773 select5 + 1,
2774 select6 + 0,
2775 select6 + 1,
2776 select7 + 0,
2777 select7 + 1,
2778 };
John Bauman89401822014-05-06 15:04:28 -04002779
2780 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002781 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002782 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2783
2784 return RValue<UShort8>(short8);
2785 }
2786
John Bauman19bac1e2014-05-06 15:23:49 -04002787 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002788 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002789 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002790#if defined(__i386__) || defined(__x86_64__)
2791 return x86::pmulhuw(x, y);
2792#else
2793 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2794#endif
John Bauman89401822014-05-06 15:04:28 -04002795 }
2796
John Bauman19bac1e2014-05-06 15:23:49 -04002797 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002798 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002799 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002800 }
2801
Nicolas Capens96d4e092016-11-18 14:22:38 -05002802 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002803 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002804 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002805 RValue<Int> res = val;
2806
Logan Chien191b3052018-08-31 16:57:15 +08002807 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002808 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002809
2810 return res;
2811 }
2812
Nicolas Capens96d4e092016-11-18 14:22:38 -05002813 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002814 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002815 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002816 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002817 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002818
2819 return val;
2820 }
2821
Nicolas Capens96d4e092016-11-18 14:22:38 -05002822 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002823 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002824 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002825 RValue<Int> res = val;
2826
Logan Chien191b3052018-08-31 16:57:15 +08002827 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002828 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002829
2830 return res;
2831 }
2832
Nicolas Capens96d4e092016-11-18 14:22:38 -05002833 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002834 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002835 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002836 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002837 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002838
2839 return val;
2840 }
2841
John Bauman19bac1e2014-05-06 15:23:49 -04002842 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002843 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002844 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002845#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002846 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002847#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002848 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002849#endif
John Bauman89401822014-05-06 15:04:28 -04002850 }
2851
John Bauman19bac1e2014-05-06 15:23:49 -04002852 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002853 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002854 return T(llvm::Type::getInt32Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002855 }
2856
John Bauman19bac1e2014-05-06 15:23:49 -04002857 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002858 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002859 return T(llvm::Type::getInt64Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002860 }
2861
John Bauman19bac1e2014-05-06 15:23:49 -04002862 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002863 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002864 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002865 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2866 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002867
Alexis Hetu764d1422016-09-28 08:44:22 -04002868 // Smallest positive value representable in UInt, but not in Int
2869 const unsigned int ustart = 0x80000000u;
2870 const float ustartf = float(ustart);
2871
2872 // If the value is negative, store 0, otherwise store the result of the conversion
2873 storeValue((~(As<Int>(cast) >> 31) &
2874 // Check if the value can be represented as an Int
2875 IfThenElse(cast >= ustartf,
2876 // If the value is too large, subtract ustart and re-add it after conversion.
2877 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2878 // Otherwise, just convert normally
2879 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002880 }
2881
Nicolas Capens96d4e092016-11-18 14:22:38 -05002882 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002883 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002884 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002885 RValue<UInt> res = val;
2886
Logan Chien191b3052018-08-31 16:57:15 +08002887 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002888 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002889
2890 return res;
2891 }
2892
Nicolas Capens96d4e092016-11-18 14:22:38 -05002893 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002894 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002895 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002896 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002897 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002898
2899 return val;
2900 }
2901
Nicolas Capens96d4e092016-11-18 14:22:38 -05002902 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002903 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002904 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002905 RValue<UInt> res = val;
2906
Logan Chien191b3052018-08-31 16:57:15 +08002907 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002908 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002909
2910 return res;
2911 }
2912
Nicolas Capens96d4e092016-11-18 14:22:38 -05002913 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002914 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002915 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002916 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002917 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002918
2919 return val;
2920 }
2921
John Bauman19bac1e2014-05-06 15:23:49 -04002922// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002923// {
Logan Chiene3191012018-08-24 22:01:50 +08002924//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002925// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002926//#else
2927// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2928//#endif
John Bauman89401822014-05-06 15:04:28 -04002929// }
2930
John Bauman19bac1e2014-05-06 15:23:49 -04002931 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002932 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002933 return T(llvm::Type::getInt32Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002934 }
2935
John Bauman19bac1e2014-05-06 15:23:49 -04002936// Int2::Int2(RValue<Int> cast)
2937// {
John Bauman19bac1e2014-05-06 15:23:49 -04002938// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2939// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002940//
Nicolas Capense89cd582016-09-30 14:23:47 -04002941// int shuffle[2] = {0, 0};
2942// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002943//
John Bauman66b8ab22014-05-06 15:57:45 -04002944// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002945// }
John Bauman89401822014-05-06 15:04:28 -04002946
John Bauman19bac1e2014-05-06 15:23:49 -04002947 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002948 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002949 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002950#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002951 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2952
2953 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002954#else
2955 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2956#endif
John Bauman89401822014-05-06 15:04:28 -04002957 }
2958
John Bauman19bac1e2014-05-06 15:23:49 -04002959 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002960 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002961 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002962#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002963 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2964
2965 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002966#else
2967 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2968#endif
John Bauman89401822014-05-06 15:04:28 -04002969 }
2970
John Bauman19bac1e2014-05-06 15:23:49 -04002971 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002972 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002973 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002974 }
2975
John Bauman19bac1e2014-05-06 15:23:49 -04002976 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002977 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002978 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002979#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002980 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2981
2982 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002983#else
2984 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2985#endif
John Bauman89401822014-05-06 15:04:28 -04002986 }
2987
John Bauman19bac1e2014-05-06 15:23:49 -04002988 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002989 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002990 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002991#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002992 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2993
2994 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002995#else
2996 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2997#endif
John Bauman89401822014-05-06 15:04:28 -04002998 }
2999
John Bauman19bac1e2014-05-06 15:23:49 -04003000 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04003001 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003002 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04003003 }
3004
Nicolas Capenscb986762017-01-20 11:34:37 -05003005 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003006 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003007 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003008#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04003009 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003010 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003011 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003012 }
3013 else
Logan Chiene3191012018-08-24 22:01:50 +08003014#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003015 {
Nicolas Capense89cd582016-09-30 14:23:47 -04003016 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04003017 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08003018 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003019
Nicolas Capense89cd582016-09-30 14:23:47 -04003020 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04003021 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08003022 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003023
Nicolas Capens01a97962017-07-28 17:30:51 -04003024 *this = As<Int4>(d);
3025 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003026 }
3027
Nicolas Capenscb986762017-01-20 11:34:37 -05003028 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003029 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003030 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003031#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04003032 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003033 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003034 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003035 }
3036 else
Logan Chiene3191012018-08-24 22:01:50 +08003037#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003038 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003039 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
3040 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
3041 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003042
Nicolas Capense89cd582016-09-30 14:23:47 -04003043 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04003044 Value *c = Nucleus::createBitCast(b, Short8::getType());
3045 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003046
Nicolas Capens01a97962017-07-28 17:30:51 -04003047 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003048 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003049 }
3050
Nicolas Capenscb986762017-01-20 11:34:37 -05003051 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003052 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003053 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003054#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003055 if(CPUID::supportsSSE4_1())
3056 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003057 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003058 }
3059 else
Logan Chiene3191012018-08-24 22:01:50 +08003060#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003061 {
Nicolas Capense89cd582016-09-30 14:23:47 -04003062 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04003063 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3064 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003065 }
3066 }
3067
Nicolas Capenscb986762017-01-20 11:34:37 -05003068 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003069 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003070 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003071#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003072 if(CPUID::supportsSSE4_1())
3073 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003074 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003075 }
3076 else
Logan Chiene3191012018-08-24 22:01:50 +08003077#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003078 {
Nicolas Capense89cd582016-09-30 14:23:47 -04003079 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04003080 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3081 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003082 }
3083 }
3084
Nicolas Capenscb986762017-01-20 11:34:37 -05003085 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04003086 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003087 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04003088 Value *vector = loadValue();
3089 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3090
Nicolas Capense89cd582016-09-30 14:23:47 -04003091 int swizzle[4] = {0, 0, 0, 0};
3092 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04003093
3094 storeValue(replicate);
3095 }
3096
John Bauman19bac1e2014-05-06 15:23:49 -04003097 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003098 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003099 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003100#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003101 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003102#else
3103 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
3104#endif
John Bauman89401822014-05-06 15:04:28 -04003105 }
3106
John Bauman19bac1e2014-05-06 15:23:49 -04003107 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003108 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003109 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003110#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003111 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003112#else
3113 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
3114#endif
John Bauman89401822014-05-06 15:04:28 -04003115 }
3116
John Bauman19bac1e2014-05-06 15:23:49 -04003117 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3118 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003119 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003120 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003121 }
3122
3123 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3124 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003125 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003126 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003127 }
3128
3129 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3130 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003131 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003132 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003133 }
3134
3135 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3136 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003137 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003138 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003139 }
3140
3141 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3142 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003143 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003144 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003145 }
3146
3147 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3148 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003149 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003150 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003151 }
3152
3153 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3154 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003155 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003156#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003157 if(CPUID::supportsSSE4_1())
3158 {
3159 return x86::pmaxsd(x, y);
3160 }
3161 else
Logan Chiene3191012018-08-24 22:01:50 +08003162#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003163 {
3164 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003165 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003166 }
3167 }
3168
3169 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3170 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003171 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003172#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003173 if(CPUID::supportsSSE4_1())
3174 {
3175 return x86::pminsd(x, y);
3176 }
3177 else
Logan Chiene3191012018-08-24 22:01:50 +08003178#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003179 {
3180 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003181 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003182 }
3183 }
3184
3185 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04003186 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003187 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003188#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003189 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08003190#else
Logan Chien2faa24a2018-09-26 19:59:32 +08003191 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08003192#endif
John Bauman89401822014-05-06 15:04:28 -04003193 }
3194
Chris Forbese86b6dc2019-03-01 09:08:47 -08003195 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
3196 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003197 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08003198 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3199 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
3200 }
3201
3202 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3203 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003204 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08003205 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3206 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
3207 }
3208
Nicolas Capens33438a62017-09-27 11:47:35 -04003209 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003210 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003211 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003212#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003213 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003214#else
3215 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
3216#endif
John Bauman89401822014-05-06 15:04:28 -04003217 }
3218
Nicolas Capens33438a62017-09-27 11:47:35 -04003219 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3220 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003221 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003222#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04003223 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003224#else
3225 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
3226#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04003227 }
3228
John Bauman19bac1e2014-05-06 15:23:49 -04003229 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003230 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003231 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003232#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003233 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08003234#else
3235 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
3236#endif
John Bauman89401822014-05-06 15:04:28 -04003237 }
3238
John Bauman19bac1e2014-05-06 15:23:49 -04003239 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04003240 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003241 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003242 }
3243
Nicolas Capenscb986762017-01-20 11:34:37 -05003244 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003245 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003246 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04003247 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
3248 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04003249
Alexis Hetu764d1422016-09-28 08:44:22 -04003250 // Smallest positive value representable in UInt, but not in Int
3251 const unsigned int ustart = 0x80000000u;
3252 const float ustartf = float(ustart);
3253
3254 // Check if the value can be represented as an Int
3255 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3256 // If the value is too large, subtract ustart and re-add it after conversion.
3257 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
3258 // Otherwise, just convert normally
3259 (~uiValue & Int4(cast));
3260 // If the value is negative, store 0, otherwise store the result of the conversion
3261 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04003262 }
3263
Ben Clayton88816fa2019-05-15 17:08:14 +01003264 UInt4::UInt4(RValue<UInt> rhs) : XYZW(this)
3265 {
3266 RR_DEBUG_INFO_UPDATE_LOC();
3267 Value *vector = loadValue();
3268 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3269
3270 int swizzle[4] = {0, 0, 0, 0};
3271 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
3272
3273 storeValue(replicate);
3274 }
3275
John Bauman19bac1e2014-05-06 15:23:49 -04003276 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003277 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003278 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003279#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003280 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08003281#else
3282 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
3283#endif
John Bauman89401822014-05-06 15:04:28 -04003284 }
3285
John Bauman19bac1e2014-05-06 15:23:49 -04003286 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003287 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003288 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003289#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003290 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003291#else
3292 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
3293#endif
John Bauman89401822014-05-06 15:04:28 -04003294 }
3295
John Bauman19bac1e2014-05-06 15:23:49 -04003296 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3297 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003298 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003299 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003300 }
3301
3302 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3303 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003304 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003305 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
3306 }
3307
3308 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3309 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003310 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003311 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003312 }
3313
3314 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3315 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003316 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003317 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
3318 }
3319
3320 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3321 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003322 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003323 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003324 }
3325
3326 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3327 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003328 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003329 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
3330 }
3331
3332 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3333 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003334 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003335#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003336 if(CPUID::supportsSSE4_1())
3337 {
3338 return x86::pmaxud(x, y);
3339 }
3340 else
Logan Chiene3191012018-08-24 22:01:50 +08003341#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003342 {
3343 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003344 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003345 }
3346 }
3347
3348 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3349 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003350 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003351#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003352 if(CPUID::supportsSSE4_1())
3353 {
3354 return x86::pminud(x, y);
3355 }
3356 else
Logan Chiene3191012018-08-24 22:01:50 +08003357#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003358 {
3359 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003360 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003361 }
3362 }
3363
John Bauman19bac1e2014-05-06 15:23:49 -04003364 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04003365 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003366 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003367 }
3368
Alexis Hetu734e2572018-12-20 14:00:49 -05003369 Type *Half::getType()
3370 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003371 return T(llvm::Type::getInt16Ty(jit->context));
Alexis Hetu734e2572018-12-20 14:00:49 -05003372 }
3373
Nicolas Capens05b3d662016-02-25 23:58:33 -05003374 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003375 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003376 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003377#if defined(__i386__) || defined(__x86_64__)
3378 if(exactAtPow2)
3379 {
3380 // rcpss uses a piecewise-linear approximation which minimizes the relative error
3381 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3382 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3383 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003384 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003385#else
3386 return As<Float>(V(lowerRCP(V(x.value))));
3387#endif
John Bauman89401822014-05-06 15:04:28 -04003388 }
John Bauman66b8ab22014-05-06 15:57:45 -04003389
John Bauman19bac1e2014-05-06 15:23:49 -04003390 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003391 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003392 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003393#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003394 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003395#else
3396 return As<Float>(V(lowerRSQRT(V(x.value))));
3397#endif
John Bauman89401822014-05-06 15:04:28 -04003398 }
3399
John Bauman19bac1e2014-05-06 15:23:49 -04003400 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003401 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003402 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003403#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003404 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003405#else
3406 return As<Float>(V(lowerSQRT(V(x.value))));
3407#endif
John Bauman89401822014-05-06 15:04:28 -04003408 }
3409
John Bauman19bac1e2014-05-06 15:23:49 -04003410 RValue<Float> Round(RValue<Float> x)
3411 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003412 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003413#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003414 if(CPUID::supportsSSE4_1())
3415 {
3416 return x86::roundss(x, 0);
3417 }
3418 else
3419 {
3420 return Float4(Round(Float4(x))).x;
3421 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003422#else
3423 return RValue<Float>(V(lowerRound(V(x.value))));
3424#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003425 }
3426
3427 RValue<Float> Trunc(RValue<Float> x)
3428 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003429 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003430#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003431 if(CPUID::supportsSSE4_1())
3432 {
3433 return x86::roundss(x, 3);
3434 }
3435 else
3436 {
3437 return Float(Int(x)); // Rounded toward zero
3438 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003439#else
3440 return RValue<Float>(V(lowerTrunc(V(x.value))));
3441#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003442 }
3443
3444 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003445 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003446 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003447#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003448 if(CPUID::supportsSSE4_1())
3449 {
3450 return x - x86::floorss(x);
3451 }
3452 else
3453 {
John Bauman19bac1e2014-05-06 15:23:49 -04003454 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04003455 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003456#else
3457 // x - floor(x) can be 1.0 for very small negative x.
3458 // Clamp against the value just below 1.0.
3459 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
3460#endif
John Bauman89401822014-05-06 15:04:28 -04003461 }
3462
John Bauman19bac1e2014-05-06 15:23:49 -04003463 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003464 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003465 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003466#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003467 if(CPUID::supportsSSE4_1())
3468 {
3469 return x86::floorss(x);
3470 }
3471 else
3472 {
3473 return Float4(Floor(Float4(x))).x;
3474 }
Logan Chien40a60052018-09-26 19:03:53 +08003475#else
3476 return RValue<Float>(V(lowerFloor(V(x.value))));
3477#endif
John Bauman89401822014-05-06 15:04:28 -04003478 }
3479
John Bauman19bac1e2014-05-06 15:23:49 -04003480 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003481 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003482 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003483#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003484 if(CPUID::supportsSSE4_1())
3485 {
3486 return x86::ceilss(x);
3487 }
3488 else
Logan Chiene3191012018-08-24 22:01:50 +08003489#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003490 {
3491 return Float4(Ceil(Float4(x))).x;
3492 }
John Bauman89401822014-05-06 15:04:28 -04003493 }
3494
John Bauman19bac1e2014-05-06 15:23:49 -04003495 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04003496 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003497 return T(llvm::Type::getFloatTy(jit->context));
John Bauman89401822014-05-06 15:04:28 -04003498 }
3499
John Bauman19bac1e2014-05-06 15:23:49 -04003500 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04003501 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003502 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04003503 }
3504
Nicolas Capenscb986762017-01-20 11:34:37 -05003505 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003506 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003507 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04003508 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04003509 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3510
Nicolas Capense89cd582016-09-30 14:23:47 -04003511 int swizzle[4] = {0, 0, 0, 0};
3512 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04003513
John Bauman66b8ab22014-05-06 15:57:45 -04003514 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04003515 }
3516
John Bauman19bac1e2014-05-06 15:23:49 -04003517 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003518 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003519 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003520#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003521 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003522#else
3523 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
3524#endif
John Bauman89401822014-05-06 15:04:28 -04003525 }
3526
John Bauman19bac1e2014-05-06 15:23:49 -04003527 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003528 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003529 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003530#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003531 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003532#else
3533 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3534#endif
John Bauman89401822014-05-06 15:04:28 -04003535 }
3536
Nicolas Capens05b3d662016-02-25 23:58:33 -05003537 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003538 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003539 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003540#if defined(__i386__) || defined(__x86_64__)
3541 if(exactAtPow2)
3542 {
3543 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3544 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3545 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3546 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003547 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003548#else
3549 return As<Float4>(V(lowerRCP(V(x.value))));
3550#endif
John Bauman89401822014-05-06 15:04:28 -04003551 }
John Bauman66b8ab22014-05-06 15:57:45 -04003552
John Bauman19bac1e2014-05-06 15:23:49 -04003553 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003554 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003555 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003556#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003557 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003558#else
3559 return As<Float4>(V(lowerRSQRT(V(x.value))));
3560#endif
John Bauman89401822014-05-06 15:04:28 -04003561 }
3562
John Bauman19bac1e2014-05-06 15:23:49 -04003563 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003564 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003565 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003566#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003567 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003568#else
3569 return As<Float4>(V(lowerSQRT(V(x.value))));
3570#endif
John Bauman89401822014-05-06 15:04:28 -04003571 }
3572
John Bauman19bac1e2014-05-06 15:23:49 -04003573 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003574 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003575 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003576#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003577 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003578#else
3579 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3580#endif
John Bauman89401822014-05-06 15:04:28 -04003581 }
3582
John Bauman19bac1e2014-05-06 15:23:49 -04003583 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003584 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003585 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003586 // return As<Int4>(x86::cmpeqps(x, y));
3587 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3588 }
3589
John Bauman19bac1e2014-05-06 15:23:49 -04003590 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003591 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003592 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003593 // return As<Int4>(x86::cmpltps(x, y));
3594 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3595 }
3596
John Bauman19bac1e2014-05-06 15:23:49 -04003597 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003598 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003599 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003600 // return As<Int4>(x86::cmpleps(x, y));
3601 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3602 }
3603
John Bauman19bac1e2014-05-06 15:23:49 -04003604 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003605 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003606 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003607 // return As<Int4>(x86::cmpneqps(x, y));
3608 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3609 }
3610
John Bauman19bac1e2014-05-06 15:23:49 -04003611 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003612 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003613 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003614 // return As<Int4>(x86::cmpnltps(x, y));
3615 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3616 }
3617
John Bauman19bac1e2014-05-06 15:23:49 -04003618 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003619 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003620 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003621 // return As<Int4>(x86::cmpnleps(x, y));
3622 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3623 }
3624
Ben Claytonec1aeb82019-03-04 19:33:27 +00003625 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3626 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003627 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003628 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3629 }
3630
3631 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3632 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003633 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003634 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3635 }
3636
3637 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3638 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003639 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003640 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3641 }
3642
3643 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3644 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003645 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003646 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3647 }
3648
3649 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3650 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003651 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003652 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3653 }
3654
3655 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3656 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003657 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003658 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3659 }
3660
John Bauman19bac1e2014-05-06 15:23:49 -04003661 RValue<Float4> Round(RValue<Float4> x)
3662 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003663 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003664#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003665 if(CPUID::supportsSSE4_1())
3666 {
3667 return x86::roundps(x, 0);
3668 }
3669 else
3670 {
3671 return Float4(RoundInt(x));
3672 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003673#else
3674 return RValue<Float4>(V(lowerRound(V(x.value))));
3675#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003676 }
3677
3678 RValue<Float4> Trunc(RValue<Float4> x)
3679 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003680 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003681#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003682 if(CPUID::supportsSSE4_1())
3683 {
3684 return x86::roundps(x, 3);
3685 }
3686 else
3687 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003688 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003689 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003690#else
3691 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3692#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003693 }
3694
3695 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003696 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003697 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003698 Float4 frc;
3699
Logan Chien40a60052018-09-26 19:03:53 +08003700#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003701 if(CPUID::supportsSSE4_1())
3702 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003703 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003704 }
3705 else
3706 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003707 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003708
Nicolas Capensb9230422017-07-17 10:27:33 -04003709 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003710 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003711#else
3712 frc = x - Floor(x);
3713#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003714
3715 // x - floor(x) can be 1.0 for very small negative x.
3716 // Clamp against the value just below 1.0.
3717 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003718 }
3719
John Bauman19bac1e2014-05-06 15:23:49 -04003720 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003721 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003722 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003723#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003724 if(CPUID::supportsSSE4_1())
3725 {
3726 return x86::floorps(x);
3727 }
3728 else
3729 {
John Bauman19bac1e2014-05-06 15:23:49 -04003730 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003731 }
Logan Chien40a60052018-09-26 19:03:53 +08003732#else
3733 return RValue<Float4>(V(lowerFloor(V(x.value))));
3734#endif
John Bauman89401822014-05-06 15:04:28 -04003735 }
3736
John Bauman19bac1e2014-05-06 15:23:49 -04003737 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003738 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003739 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003740#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003741 if(CPUID::supportsSSE4_1())
3742 {
3743 return x86::ceilps(x);
3744 }
3745 else
Logan Chiene3191012018-08-24 22:01:50 +08003746#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003747 {
3748 return -Floor(-x);
3749 }
John Bauman89401822014-05-06 15:04:28 -04003750 }
3751
Ben Claytona2c8b772019-04-09 13:42:36 -04003752 RValue<Float4> Sin(RValue<Float4> v)
3753 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003754 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::sin, { V(v.value)->getType() } );
3755 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Claytona2c8b772019-04-09 13:42:36 -04003756 }
3757
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003758 RValue<Float4> Cos(RValue<Float4> v)
3759 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003760 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::cos, { V(v.value)->getType() } );
3761 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003762 }
3763
Ben Clayton14740062019-04-09 13:48:41 -04003764 RValue<Float4> Tan(RValue<Float4> v)
3765 {
3766 return Sin(v) / Cos(v);
3767 }
3768
Ben Claytoneafae472019-04-09 14:22:38 -04003769 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003770 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003771 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01003772 auto func = jit->module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003773 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3774 for (uint64_t i = 0; i < 4; i++)
3775 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003776 auto el = jit->builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
Ben Claytonc38fc122019-04-11 08:58:49 -04003777 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003778 }
3779 return RValue<Float4>(V(out));
3780 }
3781
Ben Claytoneafae472019-04-09 14:22:38 -04003782 RValue<Float4> Asin(RValue<Float4> v)
3783 {
3784 return TransformFloat4PerElement(v, "asinf");
3785 }
3786
3787 RValue<Float4> Acos(RValue<Float4> v)
3788 {
3789 return TransformFloat4PerElement(v, "acosf");
3790 }
3791
Ben Clayton749b4e02019-04-09 14:27:43 -04003792 RValue<Float4> Atan(RValue<Float4> v)
3793 {
3794 return TransformFloat4PerElement(v, "atanf");
3795 }
3796
Ben Claytond9636972019-04-09 15:09:54 -04003797 RValue<Float4> Sinh(RValue<Float4> v)
3798 {
3799 return TransformFloat4PerElement(v, "sinhf");
3800 }
3801
Ben Clayton900ea2c2019-04-09 15:25:36 -04003802 RValue<Float4> Cosh(RValue<Float4> v)
3803 {
3804 return TransformFloat4PerElement(v, "coshf");
3805 }
3806
Ben Clayton3928bd92019-04-09 15:27:41 -04003807 RValue<Float4> Tanh(RValue<Float4> v)
3808 {
3809 return TransformFloat4PerElement(v, "tanhf");
3810 }
3811
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003812 RValue<Float4> Asinh(RValue<Float4> v)
3813 {
3814 return TransformFloat4PerElement(v, "asinhf");
3815 }
3816
Ben Clayton28ebcb02019-04-09 15:33:38 -04003817 RValue<Float4> Acosh(RValue<Float4> v)
3818 {
3819 return TransformFloat4PerElement(v, "acoshf");
3820 }
3821
Ben Claytonfa6a5392019-04-09 15:35:24 -04003822 RValue<Float4> Atanh(RValue<Float4> v)
3823 {
3824 return TransformFloat4PerElement(v, "atanhf");
3825 }
3826
Ben Claytona520c3e2019-04-09 15:43:45 -04003827 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3828 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003829 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3830 paramTys.push_back(T(Float::getType()));
3831 paramTys.push_back(T(Float::getType()));
3832 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01003833 auto func = jit->module->getOrInsertFunction("atan2f", funcTy);
Ben Claytona520c3e2019-04-09 15:43:45 -04003834 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3835 for (uint64_t i = 0; i < 4; i++)
3836 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003837 auto el = jit->builder->CreateCall2(func, ARGS(
Ben Claytonc38fc122019-04-11 08:58:49 -04003838 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3839 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3840 ));
3841 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003842 }
3843 return RValue<Float4>(V(out));
3844 }
3845
Ben Claytonbfe94f02019-04-09 15:52:12 -04003846 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3847 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003848 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::pow, { T(Float4::getType()) });
3849 return RValue<Float4>(V(jit->builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003850 }
3851
Ben Clayton242f0022019-04-09 16:00:53 -04003852 RValue<Float4> Exp(RValue<Float4> v)
3853 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003854 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::exp, { T(Float4::getType()) } );
3855 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003856 }
3857
Ben Clayton2c1da722019-04-09 16:03:03 -04003858 RValue<Float4> Log(RValue<Float4> v)
3859 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003860 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::log, { T(Float4::getType()) } );
3861 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003862 }
3863
Ben Claytonf40b56c2019-04-09 16:06:55 -04003864 RValue<Float4> Exp2(RValue<Float4> v)
3865 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003866 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::exp2, { T(Float4::getType()) } );
3867 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003868 }
3869
Ben Claytone17acfe2019-04-09 16:09:13 -04003870 RValue<Float4> Log2(RValue<Float4> v)
3871 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003872 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::log2, { T(Float4::getType()) } );
3873 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003874 }
3875
Ben Clayton60958262019-04-10 14:53:30 -04003876 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3877 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003878 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::ctlz, { T(UInt4::getType()) } );
3879 return RValue<UInt4>(V(jit->builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003880 V(v.value),
Ben Clayton6f8e5652019-06-29 01:58:02 +01003881 isZeroUndef ? ::llvm::ConstantInt::getTrue(jit->context) : ::llvm::ConstantInt::getFalse(jit->context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003882 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003883 }
3884
Ben Clayton3f007c42019-04-10 14:54:23 -04003885 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3886 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003887 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::cttz, { T(UInt4::getType()) } );
3888 return RValue<UInt4>(V(jit->builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003889 V(v.value),
Ben Clayton6f8e5652019-06-29 01:58:02 +01003890 isZeroUndef ? ::llvm::ConstantInt::getTrue(jit->context) : ::llvm::ConstantInt::getFalse(jit->context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003891 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003892 }
3893
John Bauman19bac1e2014-05-06 15:23:49 -04003894 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003895 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003896 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003897 }
3898
John Bauman89401822014-05-06 15:04:28 -04003899 RValue<Long> Ticks()
3900 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003901 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01003902 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003903
Ben Clayton6f8e5652019-06-29 01:58:02 +01003904 return RValue<Long>(V(jit->builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003905 }
Ben Claytond853c122019-04-16 17:51:49 -04003906
3907 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3908 {
3909 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3910 // is defined to truncate (and zero extend) if necessary.
Ben Clayton6f8e5652019-06-29 01:58:02 +01003911 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(jit->context), reinterpret_cast<uintptr_t>(ptr));
3912 return RValue<Pointer<Byte>>(V(jit->builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
Ben Claytond853c122019-04-16 17:51:49 -04003913 }
3914
3915 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3916 {
3917 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3918 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3919 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3920
3921 auto funcPtrTy = funcTy->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01003922 auto funcPtr = jit->builder->CreatePointerCast(V(fptr.value), funcPtrTy);
Ben Claytond853c122019-04-16 17:51:49 -04003923
3924 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3925 for (auto arg : args) { arguments.push_back(V(arg)); }
Ben Clayton6f8e5652019-06-29 01:58:02 +01003926 return V(jit->builder->CreateCall(funcPtr, arguments));
Ben Claytond853c122019-04-16 17:51:49 -04003927 }
Nicolas Capens9770a462019-06-25 10:47:10 -04003928
3929 void Breakpoint()
3930 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003931 llvm::Function *debugtrap = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::debugtrap);
Nicolas Capens9770a462019-06-25 10:47:10 -04003932
Ben Clayton6f8e5652019-06-29 01:58:02 +01003933 jit->builder->CreateCall(debugtrap);
Nicolas Capens9770a462019-06-25 10:47:10 -04003934 }
John Bauman89401822014-05-06 15:04:28 -04003935}
3936
Nicolas Capens48461502018-08-06 14:20:45 -04003937namespace rr
John Bauman89401822014-05-06 15:04:28 -04003938{
Logan Chiene3191012018-08-24 22:01:50 +08003939#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003940 namespace x86
3941 {
John Bauman19bac1e2014-05-06 15:23:49 -04003942 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003943 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003944 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003945
John Bauman89401822014-05-06 15:04:28 -04003946 Float4 vector;
3947 vector.x = val;
3948
Ben Clayton6f8e5652019-06-29 01:58:02 +01003949 return RValue<Int>(V(jit->builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003950 }
3951
John Bauman19bac1e2014-05-06 15:23:49 -04003952 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003953 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003954 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003955
Ben Clayton6f8e5652019-06-29 01:58:02 +01003956 return RValue<Int4>(V(jit->builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003957 }
3958
John Bauman19bac1e2014-05-06 15:23:49 -04003959 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003960 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003961 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003962
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003963 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003964
Ben Clayton6f8e5652019-06-29 01:58:02 +01003965 return RValue<Float>(Nucleus::createExtractElement(V(jit->builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003966 }
3967
John Bauman19bac1e2014-05-06 15:23:49 -04003968 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003969 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003970 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3971 return RValue<Float>(V(jit->builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003972 }
3973
John Bauman19bac1e2014-05-06 15:23:49 -04003974 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003975 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003976 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003977
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003978 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003979
Ben Clayton6f8e5652019-06-29 01:58:02 +01003980 return RValue<Float>(Nucleus::createExtractElement(V(jit->builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003981 }
3982
John Bauman19bac1e2014-05-06 15:23:49 -04003983 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003984 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003985 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003986
Ben Clayton6f8e5652019-06-29 01:58:02 +01003987 return RValue<Float4>(V(jit->builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003988 }
3989
John Bauman19bac1e2014-05-06 15:23:49 -04003990 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003991 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003992 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003993
Ben Clayton6f8e5652019-06-29 01:58:02 +01003994 return RValue<Float4>(V(jit->builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003995 }
3996
John Bauman19bac1e2014-05-06 15:23:49 -04003997 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003998 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003999 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04004000
Ben Clayton6f8e5652019-06-29 01:58:02 +01004001 return RValue<Float4>(V(jit->builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04004002 }
4003
John Bauman19bac1e2014-05-06 15:23:49 -04004004 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04004005 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004006 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04004007
Ben Clayton6f8e5652019-06-29 01:58:02 +01004008 return RValue<Float4>(V(jit->builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004009 }
4010
John Bauman19bac1e2014-05-06 15:23:49 -04004011 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04004012 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004013 llvm::Function *minps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04004014
Ben Clayton6f8e5652019-06-29 01:58:02 +01004015 return RValue<Float4>(V(jit->builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004016 }
4017
John Bauman19bac1e2014-05-06 15:23:49 -04004018 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04004019 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004020 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04004021
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004022 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04004023 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
4024
Ben Clayton6f8e5652019-06-29 01:58:02 +01004025 return RValue<Float>(Nucleus::createExtractElement(V(jit->builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04004026 }
4027
John Bauman19bac1e2014-05-06 15:23:49 -04004028 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04004029 {
4030 return roundss(val, 1);
4031 }
4032
John Bauman19bac1e2014-05-06 15:23:49 -04004033 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04004034 {
4035 return roundss(val, 2);
4036 }
4037
John Bauman19bac1e2014-05-06 15:23:49 -04004038 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04004039 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004040 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04004041
Ben Clayton6f8e5652019-06-29 01:58:02 +01004042 return RValue<Float4>(V(jit->builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04004043 }
4044
John Bauman19bac1e2014-05-06 15:23:49 -04004045 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04004046 {
4047 return roundps(val, 1);
4048 }
4049
John Bauman19bac1e2014-05-06 15:23:49 -04004050 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04004051 {
4052 return roundps(val, 2);
4053 }
4054
Alexis Hetu0f448072016-03-18 10:56:08 -04004055 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04004056 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004057 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04004058 }
4059
John Bauman19bac1e2014-05-06 15:23:49 -04004060 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004061 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004062 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04004063
Ben Clayton6f8e5652019-06-29 01:58:02 +01004064 return As<Short4>(V(jit->builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004065 }
John Bauman66b8ab22014-05-06 15:57:45 -04004066
John Bauman19bac1e2014-05-06 15:23:49 -04004067 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004068 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004069 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04004070
Ben Clayton6f8e5652019-06-29 01:58:02 +01004071 return As<Short4>(V(jit->builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004072 }
4073
John Bauman19bac1e2014-05-06 15:23:49 -04004074 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004075 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004076 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04004077
Ben Clayton6f8e5652019-06-29 01:58:02 +01004078 return As<UShort4>(V(jit->builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004079 }
John Bauman66b8ab22014-05-06 15:57:45 -04004080
John Bauman19bac1e2014-05-06 15:23:49 -04004081 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004082 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004083 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04004084
Ben Clayton6f8e5652019-06-29 01:58:02 +01004085 return As<UShort4>(V(jit->builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004086 }
4087
John Bauman19bac1e2014-05-06 15:23:49 -04004088 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04004089 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004090 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04004091
Ben Clayton6f8e5652019-06-29 01:58:02 +01004092 return As<SByte8>(V(jit->builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004093 }
John Bauman66b8ab22014-05-06 15:57:45 -04004094
John Bauman19bac1e2014-05-06 15:23:49 -04004095 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04004096 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004097 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04004098
Ben Clayton6f8e5652019-06-29 01:58:02 +01004099 return As<SByte8>(V(jit->builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004100 }
John Bauman66b8ab22014-05-06 15:57:45 -04004101
John Bauman19bac1e2014-05-06 15:23:49 -04004102 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04004103 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004104 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04004105
Ben Clayton6f8e5652019-06-29 01:58:02 +01004106 return As<Byte8>(V(jit->builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004107 }
John Bauman66b8ab22014-05-06 15:57:45 -04004108
John Bauman19bac1e2014-05-06 15:23:49 -04004109 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04004110 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004111 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04004112
Ben Clayton6f8e5652019-06-29 01:58:02 +01004113 return As<Byte8>(V(jit->builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04004114 }
4115
4116 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004117 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004118 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04004119 }
4120
John Bauman19bac1e2014-05-06 15:23:49 -04004121 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004122 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004123 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04004124 }
4125
John Bauman19bac1e2014-05-06 15:23:49 -04004126 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004127 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004128 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04004129 }
4130
John Bauman19bac1e2014-05-06 15:23:49 -04004131 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004132 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004133 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004134 }
4135
John Bauman19bac1e2014-05-06 15:23:49 -04004136 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004137 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004138 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004139 }
4140
John Bauman19bac1e2014-05-06 15:23:49 -04004141 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04004142 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004143 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004144 }
4145
John Bauman19bac1e2014-05-06 15:23:49 -04004146 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04004147 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004148 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004149 }
4150
John Bauman19bac1e2014-05-06 15:23:49 -04004151 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04004152 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004153 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04004154
Ben Clayton6f8e5652019-06-29 01:58:02 +01004155 return As<Short4>(V(jit->builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004156 }
4157
John Bauman19bac1e2014-05-06 15:23:49 -04004158 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04004159 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004160 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04004161
Ben Clayton6f8e5652019-06-29 01:58:02 +01004162 return RValue<Short8>(V(jit->builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004163 }
4164
John Bauman19bac1e2014-05-06 15:23:49 -04004165 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004166 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004167 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04004168
Ben Clayton6f8e5652019-06-29 01:58:02 +01004169 return As<SByte8>(V(jit->builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004170 }
4171
Nicolas Capens33438a62017-09-27 11:47:35 -04004172 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004173 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004174 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04004175
Ben Clayton6f8e5652019-06-29 01:58:02 +01004176 return As<Byte8>(V(jit->builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004177 }
4178
Nicolas Capens3e7062b2017-01-17 14:01:33 -05004179 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04004180 {
4181 if(CPUID::supportsSSE4_1())
4182 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004183 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04004184
Ben Clayton6f8e5652019-06-29 01:58:02 +01004185 return RValue<UShort8>(V(jit->builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004186 }
4187 else
4188 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05004189 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
4190 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
4191
4192 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04004193 }
4194 }
4195
John Bauman19bac1e2014-05-06 15:23:49 -04004196 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004197 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004198 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04004199
Ben Clayton6f8e5652019-06-29 01:58:02 +01004200 return As<UShort4>(V(jit->builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004201 }
4202
John Bauman19bac1e2014-05-06 15:23:49 -04004203 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004204 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004205 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04004206
Ben Clayton6f8e5652019-06-29 01:58:02 +01004207 return RValue<UShort8>(V(jit->builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004208 }
4209
John Bauman19bac1e2014-05-06 15:23:49 -04004210 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004211 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004212 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04004213
Ben Clayton6f8e5652019-06-29 01:58:02 +01004214 return As<Short4>(V(jit->builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004215 }
4216
John Bauman19bac1e2014-05-06 15:23:49 -04004217 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004218 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004219 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04004220
Ben Clayton6f8e5652019-06-29 01:58:02 +01004221 return RValue<Short8>(V(jit->builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004222 }
4223
John Bauman19bac1e2014-05-06 15:23:49 -04004224 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004225 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004226 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04004227
Ben Clayton6f8e5652019-06-29 01:58:02 +01004228 return As<Short4>(V(jit->builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004229 }
4230
John Bauman19bac1e2014-05-06 15:23:49 -04004231 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004232 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004233 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04004234
Ben Clayton6f8e5652019-06-29 01:58:02 +01004235 return RValue<Short8>(V(jit->builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004236 }
4237
John Bauman19bac1e2014-05-06 15:23:49 -04004238 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004239 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004240 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04004241
Ben Clayton6f8e5652019-06-29 01:58:02 +01004242 return As<Int2>(V(jit->builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004243 }
4244
John Bauman19bac1e2014-05-06 15:23:49 -04004245 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004246 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004247 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04004248
Ben Clayton6f8e5652019-06-29 01:58:02 +01004249 return RValue<Int4>(V(jit->builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004250 }
4251
John Bauman19bac1e2014-05-06 15:23:49 -04004252 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004253 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004254 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04004255
Ben Clayton6f8e5652019-06-29 01:58:02 +01004256 return As<Int2>(V(jit->builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004257 }
4258
John Bauman19bac1e2014-05-06 15:23:49 -04004259 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004260 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004261 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04004262
Ben Clayton6f8e5652019-06-29 01:58:02 +01004263 return RValue<Int4>(V(jit->builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004264 }
4265
John Bauman19bac1e2014-05-06 15:23:49 -04004266 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004267 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004268 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04004269
Ben Clayton6f8e5652019-06-29 01:58:02 +01004270 return As<UInt2>(V(jit->builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004271 }
4272
John Bauman19bac1e2014-05-06 15:23:49 -04004273 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004274 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004275 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04004276
Ben Clayton6f8e5652019-06-29 01:58:02 +01004277 return RValue<UInt4>(V(jit->builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004278 }
4279
John Bauman19bac1e2014-05-06 15:23:49 -04004280 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
4281 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004282 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004283 }
4284
4285 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
4286 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004287 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004288 }
4289
4290 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
4291 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004292 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004293 }
4294
4295 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
4296 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004297 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004298 }
4299
4300 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004301 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004302 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004303
Ben Clayton6f8e5652019-06-29 01:58:02 +01004304 return As<Short4>(V(jit->builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004305 }
4306
John Bauman19bac1e2014-05-06 15:23:49 -04004307 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004308 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004309 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004310
Ben Clayton6f8e5652019-06-29 01:58:02 +01004311 return As<UShort4>(V(jit->builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004312 }
4313
John Bauman19bac1e2014-05-06 15:23:49 -04004314 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004315 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004316 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004317
Ben Clayton6f8e5652019-06-29 01:58:02 +01004318 return As<Int2>(V(jit->builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004319 }
4320
John Bauman19bac1e2014-05-06 15:23:49 -04004321 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004322 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004323 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004324
Ben Clayton6f8e5652019-06-29 01:58:02 +01004325 return RValue<Short8>(V(jit->builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004326 }
4327
John Bauman19bac1e2014-05-06 15:23:49 -04004328 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04004329 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004330 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004331
Ben Clayton6f8e5652019-06-29 01:58:02 +01004332 return RValue<UShort8>(V(jit->builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004333 }
4334
John Bauman19bac1e2014-05-06 15:23:49 -04004335 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004336 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004337 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004338
Ben Clayton6f8e5652019-06-29 01:58:02 +01004339 return RValue<Int4>(V(jit->builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004340 }
4341
John Bauman19bac1e2014-05-06 15:23:49 -04004342 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04004343 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004344 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04004345
Ben Clayton6f8e5652019-06-29 01:58:02 +01004346 return RValue<Int>(V(jit->builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04004347 }
4348
John Bauman19bac1e2014-05-06 15:23:49 -04004349 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04004350 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004351 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04004352
Ben Clayton6f8e5652019-06-29 01:58:02 +01004353 return RValue<Int>(V(jit->builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04004354 }
4355
Nicolas Capens01a97962017-07-28 17:30:51 -04004356 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04004357 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004358 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004359 }
4360
Nicolas Capens01a97962017-07-28 17:30:51 -04004361 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04004362 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004363 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004364 }
4365
Nicolas Capens01a97962017-07-28 17:30:51 -04004366 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04004367 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004368 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004369 }
4370
Nicolas Capens01a97962017-07-28 17:30:51 -04004371 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04004372 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004373 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004374 }
4375 }
Logan Chiene3191012018-08-24 22:01:50 +08004376#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004377
Ben Clayton60a3d6f2019-02-26 17:24:46 +00004378#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004379 // extractAll returns a vector containing the extracted n scalar value of
4380 // the vector vec.
4381 static std::vector<Value*> extractAll(Value* vec, int n)
4382 {
4383 std::vector<Value*> elements;
4384 elements.reserve(n);
4385 for (int i = 0; i < n; i++)
4386 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004387 auto el = V(jit->builder->CreateExtractElement(V(vec), i));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004388 elements.push_back(el);
4389 }
4390 return elements;
4391 }
4392
Ben Claytonca8e3d72019-05-14 16:51:05 +01004393 // toInt returns all the integer values in vals extended to a native width
4394 // integer.
4395 static std::vector<Value*> toInt(const std::vector<Value*>& vals, bool isSigned)
4396 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004397 auto intTy = ::llvm::Type::getIntNTy(jit->context, sizeof(int) * 8); // Natural integer width.
Ben Claytonca8e3d72019-05-14 16:51:05 +01004398 std::vector<Value*> elements;
4399 elements.reserve(vals.size());
4400 for (auto v : vals)
4401 {
4402 if (isSigned)
4403 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004404 elements.push_back(V(jit->builder->CreateSExt(V(v), intTy)));
Ben Claytonca8e3d72019-05-14 16:51:05 +01004405 }
4406 else
4407 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004408 elements.push_back(V(jit->builder->CreateZExt(V(v), intTy)));
Ben Claytonca8e3d72019-05-14 16:51:05 +01004409 }
4410 }
4411 return elements;
4412 }
4413
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004414 // toDouble returns all the float values in vals extended to doubles.
4415 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
4416 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004417 auto doubleTy = ::llvm::Type::getDoubleTy(jit->context);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004418 std::vector<Value*> elements;
4419 elements.reserve(vals.size());
4420 for (auto v : vals)
4421 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004422 elements.push_back(V(jit->builder->CreateFPExt(V(v), doubleTy)));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004423 }
4424 return elements;
4425 }
4426
Ben Claytonca8e3d72019-05-14 16:51:05 +01004427 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return toInt(extractAll(v.value, 4), false); }
4428 std::vector<Value*> PrintValue::Ty<Int>::val(const RValue<Int>& v) { return toInt({v.value}, true); }
4429 std::vector<Value*> PrintValue::Ty<Int2>::val(const RValue<Int2>& v) { return toInt(extractAll(v.value, 2), true); }
4430 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return toInt(extractAll(v.value, 4), true); }
4431 std::vector<Value*> PrintValue::Ty<UInt>::val(const RValue<UInt>& v) { return toInt({v.value}, false); }
4432 std::vector<Value*> PrintValue::Ty<UInt2>::val(const RValue<UInt2>& v) { return toInt(extractAll(v.value, 2), false); }
4433 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return toInt(extractAll(v.value, 4), false); }
4434 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return toInt(extractAll(v.value, 4), true); }
4435 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return toInt(extractAll(v.value, 4), false); }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004436 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
4437 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
Ben Clayton6f8e5652019-06-29 01:58:02 +01004438 std::vector<Value*> PrintValue::Ty<const char*>::val(const char* v) { return {V(jit->builder->CreateGlobalStringPtr(v))}; }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004439
4440 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
4441 {
4442 // LLVM types used below.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004443 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
4444 auto intTy = ::llvm::Type::getIntNTy(jit->context, sizeof(int) * 8); // Natural integer width.
4445 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004446 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
4447
Ben Clayton6f8e5652019-06-29 01:58:02 +01004448 auto func = jit->module->getOrInsertFunction("printf", funcTy);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004449
4450 // Build the printf format message string.
4451 std::string str;
4452 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
4453 if (function != nullptr) { str += "%s "; }
4454 str += fmt;
4455
4456 // Perform subsitution on all '{n}' bracketed indices in the format
4457 // message.
4458 int i = 0;
4459 for (const PrintValue& arg : args)
4460 {
4461 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
4462 }
4463
4464 ::llvm::SmallVector<::llvm::Value*, 8> vals;
4465
4466 // The format message is always the first argument.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004467 vals.push_back(jit->builder->CreateGlobalStringPtr(str));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004468
4469 // Add optional file, line and function info if provided.
4470 if (file != nullptr)
4471 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004472 vals.push_back(jit->builder->CreateGlobalStringPtr(file));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004473 if (line > 0)
4474 {
4475 vals.push_back(::llvm::ConstantInt::get(intTy, line));
4476 }
4477 }
4478 if (function != nullptr)
4479 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004480 vals.push_back(jit->builder->CreateGlobalStringPtr(function));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004481 }
4482
4483 // Add all format arguments.
4484 for (const PrintValue& arg : args)
4485 {
4486 for (auto val : arg.values)
4487 {
4488 vals.push_back(V(val));
4489 }
4490 }
4491
Ben Clayton6f8e5652019-06-29 01:58:02 +01004492 jit->builder->CreateCall(func, vals);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004493 }
4494#endif // ENABLE_RR_PRINT
4495
Ben Claytonac07ed82019-03-26 14:17:41 +00004496 void Nop()
4497 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004498 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
Ben Claytonac07ed82019-03-26 14:17:41 +00004499 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004500 auto func = jit->module->getOrInsertFunction("nop", funcTy);
4501 jit->builder->CreateCall(func);
Ben Claytonac07ed82019-03-26 14:17:41 +00004502 }
4503
4504 void EmitDebugLocation()
4505 {
4506#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004507 if (jit->debugInfo != nullptr)
Ben Claytonac07ed82019-03-26 14:17:41 +00004508 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004509 jit->debugInfo->EmitLocation();
Ben Claytonac07ed82019-03-26 14:17:41 +00004510 }
4511#endif // ENABLE_RR_DEBUG_INFO
4512 }
4513
4514 void EmitDebugVariable(Value* value)
4515 {
4516#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004517 if (jit->debugInfo != nullptr)
Ben Claytonac07ed82019-03-26 14:17:41 +00004518 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004519 jit->debugInfo->EmitVariable(value);
Ben Claytonac07ed82019-03-26 14:17:41 +00004520 }
4521#endif // ENABLE_RR_DEBUG_INFO
4522 }
4523
4524 void FlushDebug()
4525 {
4526#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004527 if (jit->debugInfo != nullptr)
Ben Claytonac07ed82019-03-26 14:17:41 +00004528 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004529 jit->debugInfo->Flush();
Ben Claytonac07ed82019-03-26 14:17:41 +00004530 }
4531#endif // ENABLE_RR_DEBUG_INFO
4532 }
4533
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004534} // namespace rr
4535
4536// ------------------------------ Coroutines ------------------------------
4537
4538namespace {
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004539 // Magic values retuned by llvm.coro.suspend.
4540 // See: https://llvm.org/docs/Coroutines.html#llvm-coro-suspend-intrinsic
4541 enum SuspendAction
4542 {
4543 SuspendActionSuspend = -1,
4544 SuspendActionResume = 0,
4545 SuspendActionDestroy = 1
4546 };
4547
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004548
Ben Clayton16da2812019-07-09 23:28:51 +01004549void promoteFunctionToCoroutine()
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004550{
Ben Clayton16da2812019-07-09 23:28:51 +01004551 ASSERT(jit->coroutine.id == nullptr);
4552
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004553 // Types
Ben Clayton6f8e5652019-06-29 01:58:02 +01004554 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
4555 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
4556 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
4557 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
4558 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
Ben Clayton16da2812019-07-09 23:28:51 +01004559 auto promiseTy = jit->coroutine.yieldType;
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004560 auto promisePtrTy = promiseTy->getPointerTo();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004561
4562 // LLVM intrinsics
Ben Clayton6f8e5652019-06-29 01:58:02 +01004563 auto coro_id = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::coro_id);
4564 auto coro_size = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::coro_size, {i32Ty});
4565 auto coro_begin = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::coro_begin);
4566 auto coro_resume = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_resume);
4567 auto coro_end = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_end);
4568 auto coro_free = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_free);
4569 auto coro_destroy = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_destroy);
4570 auto coro_promise = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_promise);
4571 auto coro_done = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_done);
4572 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_suspend);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004573
4574 auto allocFrameTy = ::llvm::FunctionType::get(i8PtrTy, {i32Ty}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004575 auto allocFrame = jit->module->getOrInsertFunction("coroutine_alloc_frame", allocFrameTy);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004576 auto freeFrameTy = ::llvm::FunctionType::get(voidTy, {i8PtrTy}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004577 auto freeFrame = jit->module->getOrInsertFunction("coroutine_free_frame", freeFrameTy);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004578
Ben Clayton16da2812019-07-09 23:28:51 +01004579 auto oldInsertionPoint = jit->builder->saveIP();
4580
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004581 // Build the coroutine_await() function:
4582 //
4583 // bool coroutine_await(CoroutineHandle* handle, YieldType* out)
4584 // {
4585 // if (llvm.coro.done(handle))
4586 // {
4587 // return false;
4588 // }
4589 // else
4590 // {
4591 // *value = (T*)llvm.coro.promise(handle);
4592 // llvm.coro.resume(handle);
4593 // return true;
4594 // }
4595 // }
4596 //
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004597 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004598 auto args = jit->coroutine.await->arg_begin();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004599 auto handle = args++;
4600 auto outPtr = args++;
Ben Clayton6f8e5652019-06-29 01:58:02 +01004601 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "co_await", jit->coroutine.await));
4602 auto doneBlock = llvm::BasicBlock::Create(jit->context, "done", jit->coroutine.await);
4603 auto resumeBlock = llvm::BasicBlock::Create(jit->context, "resume", jit->coroutine.await);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004604
Ben Clayton6f8e5652019-06-29 01:58:02 +01004605 auto done = jit->builder->CreateCall(coro_done, {handle}, "done");
4606 jit->builder->CreateCondBr(done, doneBlock, resumeBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004607
Ben Clayton6f8e5652019-06-29 01:58:02 +01004608 jit->builder->SetInsertPoint(doneBlock);
4609 jit->builder->CreateRet(::llvm::ConstantInt::getFalse(i1Ty));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004610
Ben Clayton6f8e5652019-06-29 01:58:02 +01004611 jit->builder->SetInsertPoint(resumeBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004612 auto promiseAlignment = ::llvm::ConstantInt::get(i32Ty, 4); // TODO: Get correct alignment.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004613 auto promisePtr = jit->builder->CreateCall(coro_promise, {handle, promiseAlignment, ::llvm::ConstantInt::get(i1Ty, 0)});
4614 auto promise = jit->builder->CreateLoad(jit->builder->CreatePointerCast(promisePtr, promisePtrTy));
4615 jit->builder->CreateStore(promise, outPtr);
4616 jit->builder->CreateCall(coro_resume, {handle});
4617 jit->builder->CreateRet(::llvm::ConstantInt::getTrue(i1Ty));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004618 }
4619
4620 // Build the coroutine_destroy() function:
4621 //
4622 // void coroutine_destroy(CoroutineHandle* handle)
4623 // {
4624 // llvm.coro.destroy(handle);
4625 // }
4626 //
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004627 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004628 auto handle = jit->coroutine.destroy->arg_begin();
4629 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.destroy));
4630 jit->builder->CreateCall(coro_destroy, {handle});
4631 jit->builder->CreateRetVoid();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004632 }
4633
4634 // Begin building the main coroutine_begin() function.
4635 //
4636 // CoroutineHandle* coroutine_begin(<Arguments>)
4637 // {
4638 // YieldType promise;
4639 // auto id = llvm.coro.id(0, &promise, nullptr, nullptr);
4640 // void* frame = coroutine_alloc_frame(llvm.coro.size.i32());
4641 // CoroutineHandle *handle = llvm.coro.begin(id, frame);
4642 //
4643 // ... <REACTOR CODE> ...
4644 //
4645 // end:
4646 // SuspendAction action = llvm.coro.suspend(none, true /* final */); // <-- RESUME POINT
4647 // switch (action)
4648 // {
4649 // case SuspendActionResume:
4650 // UNREACHABLE(); // Illegal to resume after final suspend.
4651 // case SuspendActionDestroy:
4652 // goto destroy;
4653 // default: // (SuspendActionSuspend)
4654 // goto suspend;
4655 // }
4656 //
4657 // destroy:
4658 // coroutine_free_frame(llvm.coro.free(id, handle));
4659 // goto suspend;
4660 //
4661 // suspend:
4662 // llvm.coro.end(handle, false);
4663 // return handle;
4664 // }
4665 //
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004666
4667#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton16da2812019-07-09 23:28:51 +01004668 jit->debugInfo = std::unique_ptr<rr::DebugInfo>(new rr::DebugInfo(jit->builder.get(), &jit->context, jit->module.get(), jit->function));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004669#endif // ENABLE_RR_DEBUG_INFO
4670
Ben Clayton6f8e5652019-06-29 01:58:02 +01004671 jit->coroutine.suspendBlock = llvm::BasicBlock::Create(jit->context, "suspend", jit->function);
4672 jit->coroutine.endBlock = llvm::BasicBlock::Create(jit->context, "end", jit->function);
4673 jit->coroutine.destroyBlock = llvm::BasicBlock::Create(jit->context, "destroy", jit->function);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004674
Ben Clayton16da2812019-07-09 23:28:51 +01004675 jit->builder->SetInsertPoint(jit->coroutine.entryBlock, jit->coroutine.entryBlock->begin());
4676 jit->coroutine.promise = jit->builder->CreateAlloca(promiseTy, nullptr, "promise");
Ben Clayton6f8e5652019-06-29 01:58:02 +01004677 jit->coroutine.id = jit->builder->CreateCall(coro_id, {
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004678 ::llvm::ConstantInt::get(i32Ty, 0),
Ben Clayton6f8e5652019-06-29 01:58:02 +01004679 jit->builder->CreatePointerCast(jit->coroutine.promise, i8PtrTy),
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004680 ::llvm::ConstantPointerNull::get(i8PtrTy),
4681 ::llvm::ConstantPointerNull::get(i8PtrTy),
4682 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01004683 auto size = jit->builder->CreateCall(coro_size, {});
4684 auto frame = jit->builder->CreateCall(allocFrame, {size});
4685 jit->coroutine.handle = jit->builder->CreateCall(coro_begin, {jit->coroutine.id, frame});
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004686
4687 // Build the suspend block
Ben Clayton6f8e5652019-06-29 01:58:02 +01004688 jit->builder->SetInsertPoint(jit->coroutine.suspendBlock);
4689 jit->builder->CreateCall(coro_end, {jit->coroutine.handle, ::llvm::ConstantInt::get(i1Ty, 0)});
4690 jit->builder->CreateRet(jit->coroutine.handle);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004691
4692 // Build the end block
Ben Clayton6f8e5652019-06-29 01:58:02 +01004693 jit->builder->SetInsertPoint(jit->coroutine.endBlock);
4694 auto action = jit->builder->CreateCall(coro_suspend, {
4695 ::llvm::ConstantTokenNone::get(jit->context),
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004696 ::llvm::ConstantInt::get(i1Ty, 1), // final: true
4697 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01004698 auto switch_ = jit->builder->CreateSwitch(action, jit->coroutine.suspendBlock, 3);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004699 // switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), trapBlock); // TODO: Trap attempting to resume after final suspend
Ben Clayton6f8e5652019-06-29 01:58:02 +01004700 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), jit->coroutine.destroyBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004701
4702 // Build the destroy block
Ben Clayton6f8e5652019-06-29 01:58:02 +01004703 jit->builder->SetInsertPoint(jit->coroutine.destroyBlock);
4704 auto memory = jit->builder->CreateCall(coro_free, {jit->coroutine.id, jit->coroutine.handle});
4705 jit->builder->CreateCall(freeFrame, {memory});
4706 jit->builder->CreateBr(jit->coroutine.suspendBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004707
Ben Clayton16da2812019-07-09 23:28:51 +01004708 // Switch back to original insert point to continue building the coroutine.
4709 jit->builder->restoreIP(oldInsertionPoint);
4710}
4711
4712} // anonymous namespace
4713
4714namespace rr {
4715
4716void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
4717{
4718 // Coroutines are initially created as a regular function.
4719 // Upon the first call to Yield(), the function is promoted to a true
4720 // coroutine.
4721 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
4722 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
4723 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
4724 auto handleTy = i8PtrTy;
4725 auto boolTy = i1Ty;
4726 auto promiseTy = T(YieldType);
4727 auto promisePtrTy = promiseTy->getPointerTo();
4728
4729 jit->function = rr::createFunction("coroutine_begin", handleTy, T(Params));
4730 jit->coroutine.await = rr::createFunction("coroutine_await", boolTy, {handleTy, promisePtrTy});
4731 jit->coroutine.destroy = rr::createFunction("coroutine_destroy", voidTy, {handleTy});
4732 jit->coroutine.yieldType = promiseTy;
4733 jit->coroutine.entryBlock = llvm::BasicBlock::Create(jit->context, "function", jit->function);
4734
4735 jit->builder->SetInsertPoint(jit->coroutine.entryBlock);
John Bauman89401822014-05-06 15:04:28 -04004736}
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004737
4738void Nucleus::yield(Value* val)
4739{
Ben Clayton16da2812019-07-09 23:28:51 +01004740 if (jit->coroutine.id == nullptr)
4741 {
4742 // First call to yield().
4743 // Promote the function to a full coroutine.
4744 promoteFunctionToCoroutine();
4745 ASSERT(jit->coroutine.id != nullptr);
4746 }
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004747
4748 // promise = val;
4749 //
4750 // auto action = llvm.coro.suspend(none, false /* final */); // <-- RESUME POINT
4751 // switch (action)
4752 // {
4753 // case SuspendActionResume:
4754 // goto resume;
4755 // case SuspendActionDestroy:
4756 // goto destroy;
4757 // default: // (SuspendActionSuspend)
4758 // goto suspend;
4759 // }
4760 // resume:
4761 //
4762
4763 RR_DEBUG_INFO_UPDATE_LOC();
4764 Variable::materializeAll();
4765
4766 // Types
Ben Clayton6f8e5652019-06-29 01:58:02 +01004767 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
4768 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004769
4770 // Intrinsics
Ben Clayton6f8e5652019-06-29 01:58:02 +01004771 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_suspend);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004772
4773 // Create a block to resume execution.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004774 auto resumeBlock = llvm::BasicBlock::Create(jit->context, "resume", jit->function);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004775
4776 // Store the promise (yield value)
Ben Clayton6f8e5652019-06-29 01:58:02 +01004777 jit->builder->CreateStore(V(val), jit->coroutine.promise);
4778 auto action = jit->builder->CreateCall(coro_suspend, {
4779 ::llvm::ConstantTokenNone::get(jit->context),
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004780 ::llvm::ConstantInt::get(i1Ty, 0), // final: true
4781 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01004782 auto switch_ = jit->builder->CreateSwitch(action, jit->coroutine.suspendBlock, 3);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004783 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), resumeBlock);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004784 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), jit->coroutine.destroyBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004785
4786 // Continue building in the resume block.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004787 jit->builder->SetInsertPoint(resumeBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004788}
4789
Ben Clayton6897e9b2019-07-16 17:27:27 +01004790std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004791{
Ben Clayton16da2812019-07-09 23:28:51 +01004792 bool isCoroutine = jit->coroutine.id != nullptr;
4793 if (isCoroutine)
4794 {
4795 jit->builder->CreateBr(jit->coroutine.endBlock);
4796 }
4797 else
4798 {
4799 // Coroutine without a Yield acts as a regular function.
4800 // The 'coroutine_begin' function returns a nullptr for the coroutine
4801 // handle.
4802 jit->builder->CreateRet(llvm::Constant::getNullValue(jit->function->getReturnType()));
4803 // The 'coroutine_await' function always returns false (coroutine done).
4804 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.await));
4805 jit->builder->CreateRet(llvm::Constant::getNullValue(jit->coroutine.await->getReturnType()));
4806 // The 'coroutine_destroy' does nothing, returns void.
4807 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.destroy));
4808 jit->builder->CreateRetVoid();
4809 }
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004810
4811#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004812 if (jit->debugInfo != nullptr)
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004813 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004814 jit->debugInfo->Finalize();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004815 }
4816#endif // ENABLE_RR_DEBUG_INFO
4817
4818 if(false)
4819 {
4820 std::error_code error;
4821 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004822 jit->module->print(file, 0);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004823 }
4824
Ben Clayton16da2812019-07-09 23:28:51 +01004825 if (isCoroutine)
4826 {
4827 // Run manadory coroutine transforms.
4828 llvm::legacy::PassManager pm;
4829 pm.add(llvm::createCoroEarlyPass());
4830 pm.add(llvm::createCoroSplitPass());
4831 pm.add(llvm::createCoroElidePass());
4832 pm.add(llvm::createBarrierNoopPass());
4833 pm.add(llvm::createCoroCleanupPass());
4834 pm.run(*jit->module);
4835 }
4836
4837#if defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
4838 {
4839 llvm::legacy::PassManager pm;
4840 pm.add(llvm::createVerifierPass());
4841 pm.run(*jit->module);
4842 }
4843#endif // defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004844
Ben Clayton55bc37a2019-07-04 12:17:12 +01004845 auto cfg = cfgEdit.apply(jit->config);
4846 jit->optimize(cfg);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004847
4848 if(false)
4849 {
4850 std::error_code error;
4851 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004852 jit->module->print(file, 0);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004853 }
4854
4855 llvm::Function *funcs[Nucleus::CoroutineEntryCount];
Ben Clayton6f8e5652019-06-29 01:58:02 +01004856 funcs[Nucleus::CoroutineEntryBegin] = jit->function;
4857 funcs[Nucleus::CoroutineEntryAwait] = jit->coroutine.await;
4858 funcs[Nucleus::CoroutineEntryDestroy] = jit->coroutine.destroy;
Ben Clayton55bc37a2019-07-04 12:17:12 +01004859 auto routine = jit->acquireRoutine(funcs, Nucleus::CoroutineEntryCount, cfg);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004860 jit.reset();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004861
4862 return routine;
4863}
4864
4865} // namespace rr