blob: 7bf902ee07019d2dfcaf742864beb78d732d6675 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Ben Claytonac07ed82019-03-26 14:17:41 +000017#include "LLVMReactor.hpp"
18#include "LLVMReactorDebugInfo.hpp"
John Bauman89401822014-05-06 15:04:28 -040019
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040020#include "x86.hpp"
21#include "CPUID.hpp"
22#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040023#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040024#include "MutexLock.hpp"
25
26#undef min
27#undef max
28
Ben Clayton09a7f452019-04-25 15:22:43 +010029#if defined(__clang__)
30// LLVM has occurances of the extra-semi warning in its headers, which will be
31// treated as an error in SwiftShader targets.
32#pragma clang diagnostic push
33#pragma clang diagnostic ignored "-Wextra-semi"
34#endif // defined(__clang__)
35
Ben Clayton5875be52019-04-11 14:57:40 -040036#include "llvm/Analysis/LoopPass.h"
37#include "llvm/ExecutionEngine/ExecutionEngine.h"
38#include "llvm/ExecutionEngine/JITSymbol.h"
39#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
40#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
41#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
42#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
43#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
44#include "llvm/ExecutionEngine/SectionMemoryManager.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DataLayout.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/GlobalVariable.h"
Ben Clayton5875be52019-04-11 14:57:40 -040049#include "llvm/IR/Intrinsics.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010050#include "llvm/IR/IRBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040051#include "llvm/IR/LegacyPassManager.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010052#include "llvm/IR/LLVMContext.h"
Ben Clayton5875be52019-04-11 14:57:40 -040053#include "llvm/IR/Mangler.h"
54#include "llvm/IR/Module.h"
Ben Clayton4b944652019-05-02 10:56:19 +010055#include "llvm/IR/Verifier.h"
Ben Clayton5875be52019-04-11 14:57:40 -040056#include "llvm/Support/Error.h"
57#include "llvm/Support/TargetSelect.h"
58#include "llvm/Target/TargetOptions.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010059#include "llvm/Transforms/Coroutines.h"
Ben Clayton5875be52019-04-11 14:57:40 -040060#include "llvm/Transforms/InstCombine/InstCombine.h"
Ben Clayton1c82c7b2019-04-30 12:49:27 +010061#include "llvm/Transforms/IPO.h"
62#include "llvm/Transforms/IPO/PassManagerBuilder.h"
Ben Clayton5875be52019-04-11 14:57:40 -040063#include "llvm/Transforms/Scalar.h"
64#include "llvm/Transforms/Scalar/GVN.h"
Ben Clayton20507fa2019-04-20 01:40:15 -040065
Ben Clayton09a7f452019-04-25 15:22:43 +010066#if defined(__clang__)
67#pragma clang diagnostic pop
68#endif // defined(__clang__)
69
Ben Clayton5875be52019-04-11 14:57:40 -040070#define ARGS(...) {__VA_ARGS__}
71#define CreateCall2 CreateCall
72#define CreateCall3 CreateCall
Logan Chien0eedc8c2018-08-21 09:34:28 +080073
Ben Clayton5875be52019-04-11 14:57:40 -040074#include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080075
John Bauman89401822014-05-06 15:04:28 -040076#include <fstream>
Ben Claytoncee3dff2019-05-22 12:01:22 +010077#include <iostream>
78#include <mutex>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000079#include <numeric>
80#include <thread>
John Bauman89401822014-05-06 15:04:28 -040081
Nicolas Capens47dc8672017-04-25 12:54:39 -040082#if defined(__i386__) || defined(__x86_64__)
83#include <xmmintrin.h>
84#endif
85
Logan Chien40a60052018-09-26 19:03:53 +080086#include <math.h>
87
Nicolas Capenscb122582014-05-06 23:34:44 -040088#if defined(__x86_64__) && defined(_WIN32)
Ben Clayton2f58df32019-06-23 21:29:25 +010089 extern "C" void X86CompilationCallback()
90 {
91 UNIMPLEMENTED("X86CompilationCallback");
92 }
93#endif
94
95#if defined(_WIN64)
96 extern "C" void __chkstk();
97#elif defined(_WIN32)
98 extern "C" void _chkstk();
John Bauman66b8ab22014-05-06 15:57:45 -040099#endif
100
Nicolas Capens48461502018-08-06 14:20:45 -0400101namespace rr
Logan Chien52cde602018-09-03 19:37:57 +0800102{
Ben Clayton6f8e5652019-06-29 01:58:02 +0100103 void* resolveExternalSymbol(const char*);
Logan Chien52cde602018-09-03 19:37:57 +0800104}
105
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400106namespace
107{
Ben Clayton55bc37a2019-07-04 12:17:12 +0100108 // Default configuration settings. Must be accessed under mutex lock.
109 std::mutex defaultConfigLock;
110 rr::Config &defaultConfig()
111 {
112 // This uses a static in a function to avoid the cost of a global static
113 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
114 static rr::Config config = rr::Config::Edit()
115 .set(rr::Optimization::Level::Default)
116 .add(rr::Optimization::Pass::ScalarReplAggregates)
117 .add(rr::Optimization::Pass::InstructionCombining)
118 .apply({});
119 return config;
120 }
121
Ben Clayton52ce1e92019-07-15 11:41:00 +0100122 // Cache provides a simple, thread-safe key-value store.
123 template <typename KEY, typename VALUE>
124 class Cache
Ben Clayton6f8e5652019-06-29 01:58:02 +0100125 {
Ben Clayton52ce1e92019-07-15 11:41:00 +0100126 public:
127 Cache() = default;
128 Cache(const Cache& other);
129 VALUE getOrCreate(KEY key, std::function<VALUE()> create);
130 private:
131 mutable std::mutex mutex; // mutable required for copy constructor.
132 std::unordered_map<KEY, VALUE> map;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100133 };
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400134
Ben Clayton52ce1e92019-07-15 11:41:00 +0100135 template <typename KEY, typename VALUE>
136 Cache<KEY, VALUE>::Cache(const Cache& other)
137 {
138 std::unique_lock<std::mutex> lock(other.mutex);
139 map = other.map;
140 }
141
142 template <typename KEY, typename VALUE>
143 VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
144 {
145 std::unique_lock<std::mutex> lock(mutex);
146 auto it = map.find(key);
147 if (it != map.end())
148 {
149 return it->second;
150 }
151 auto value = create();
152 map.emplace(key, value);
153 return value;
154 }
155
Ben Clayton6f8e5652019-06-29 01:58:02 +0100156 // JITGlobals is a singleton that holds all the immutable machine specific
157 // information for the host device.
Ben Clayton52ce1e92019-07-15 11:41:00 +0100158 class JITGlobals
Ben Clayton6f8e5652019-06-29 01:58:02 +0100159 {
160 public:
Ben Clayton52ce1e92019-07-15 11:41:00 +0100161 using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100162
Ben Clayton52ce1e92019-07-15 11:41:00 +0100163 static JITGlobals * get();
164
165 const std::string mcpu;
166 const std::vector<std::string> mattrs;
167 const char* const march;
168 const llvm::TargetOptions targetOptions;
169 const llvm::DataLayout dataLayout;
170
171 TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100172
173 private:
Ben Clayton52ce1e92019-07-15 11:41:00 +0100174 static JITGlobals create();
175 static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
176 JITGlobals(const char *mcpu,
177 const std::vector<std::string> &mattrs,
178 const char *march,
179 const llvm::TargetOptions &targetOptions,
180 const llvm::DataLayout &dataLayout);
181 JITGlobals(const JITGlobals&) = default;
182
183 // The cache key here is actually a rr::Optimization::Level. We use int
184 // as 'enum class' types do not provide builtin hash functions until
185 // C++14. See: https://stackoverflow.com/a/29618545.
186 Cache<int, TargetMachineSPtr> targetMachines;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100187 };
188
Ben Clayton52ce1e92019-07-15 11:41:00 +0100189 JITGlobals * JITGlobals::get()
Ben Clayton6f8e5652019-06-29 01:58:02 +0100190 {
Ben Clayton52ce1e92019-07-15 11:41:00 +0100191 static JITGlobals instance = create();
Ben Clayton6f8e5652019-06-29 01:58:02 +0100192 return &instance;
193 }
194
Ben Clayton52ce1e92019-07-15 11:41:00 +0100195 JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100196 {
Ben Clayton52ce1e92019-07-15 11:41:00 +0100197 return targetMachines.getOrCreate(static_cast<int>(optlevel), [&]() {
198 return TargetMachineSPtr(llvm::EngineBuilder()
199#ifdef ENABLE_RR_DEBUG_INFO
Ben Claytone031f362019-07-20 12:35:40 +0100200 .setOptLevel(toLLVM(rr::Optimization::Level::None))
Ben Clayton52ce1e92019-07-15 11:41:00 +0100201#else
202 .setOptLevel(toLLVM(optlevel))
203#endif // ENABLE_RR_DEBUG_INFO
204 .setMCPU(mcpu)
205 .setMArch(march)
206 .setMAttrs(mattrs)
207 .setTargetOptions(targetOptions)
208 .selectTarget());
209 });
210 }
Ben Clayton49f80512019-07-04 17:30:54 +0100211
Ben Clayton52ce1e92019-07-15 11:41:00 +0100212 JITGlobals JITGlobals::create()
213 {
214 struct LLVMInitializer
215 {
216 LLVMInitializer()
217 {
218 llvm::InitializeNativeTarget();
219 llvm::InitializeNativeTargetAsmPrinter();
220 llvm::InitializeNativeTargetAsmParser();
221 }
222 };
223 static LLVMInitializer initializeLLVM;
224
225 auto mcpu = llvm::sys::getHostCPUName();
226
Ben Clayton6f8e5652019-06-29 01:58:02 +0100227 llvm::StringMap<bool> features;
228 bool ok = llvm::sys::getHostCPUFeatures(features);
229
230#if defined(__i386__) || defined(__x86_64__) || \
231(defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
232 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
233#else
234 (void) ok; // getHostCPUFeatures always returns false on other platforms
Ben Claytonac07ed82019-03-26 14:17:41 +0000235#endif
236
Ben Clayton52ce1e92019-07-15 11:41:00 +0100237 std::vector<std::string> mattrs;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100238 for (auto &feature : features)
239 {
240 if (feature.second) { mattrs.push_back(feature.first()); }
241 }
242
Ben Clayton52ce1e92019-07-15 11:41:00 +0100243 const char* march = nullptr;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100244#if defined(__x86_64__)
Ben Clayton49f80512019-07-04 17:30:54 +0100245 march = "x86-64";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100246#elif defined(__i386__)
Ben Clayton49f80512019-07-04 17:30:54 +0100247 march = "x86";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100248#elif defined(__aarch64__)
Ben Clayton49f80512019-07-04 17:30:54 +0100249 march = "arm64";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100250#elif defined(__arm__)
Ben Clayton49f80512019-07-04 17:30:54 +0100251 march = "arm";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100252#elif defined(__mips__)
253#if defined(__mips64)
Ben Clayton49f80512019-07-04 17:30:54 +0100254 march = "mips64el";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100255#else
Ben Clayton49f80512019-07-04 17:30:54 +0100256 march = "mipsel";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100257#endif
258#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
Ben Clayton49f80512019-07-04 17:30:54 +0100259 march = "ppc64le";
Ben Clayton6f8e5652019-06-29 01:58:02 +0100260#else
261 #error "unknown architecture"
262#endif
263
Ben Clayton52ce1e92019-07-15 11:41:00 +0100264 llvm::TargetOptions targetOptions;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100265 targetOptions.UnsafeFPMath = false;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100266
267 auto targetMachine = std::unique_ptr<llvm::TargetMachine>(
268 llvm::EngineBuilder()
269 .setOptLevel(llvm::CodeGenOpt::None)
Ben Clayton49f80512019-07-04 17:30:54 +0100270 .setMCPU(mcpu)
271 .setMArch(march)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100272 .setMAttrs(mattrs)
273 .setTargetOptions(targetOptions)
274 .selectTarget());
275
Ben Clayton52ce1e92019-07-15 11:41:00 +0100276 auto dataLayout = targetMachine->createDataLayout();
277
278 return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout);
279 }
280
281 llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
282 {
283 switch (level)
284 {
285 case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None;
286 case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less;
287 case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default;
288 case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
289 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
290 }
291 return ::llvm::CodeGenOpt::Default;
292 }
293
294 JITGlobals::JITGlobals(const char* mcpu,
295 const std::vector<std::string> &mattrs,
296 const char* march,
297 const llvm::TargetOptions &targetOptions,
298 const llvm::DataLayout &dataLayout) :
299 mcpu(mcpu),
300 mattrs(mattrs),
301 march(march),
302 targetOptions(targetOptions),
303 dataLayout(dataLayout)
304 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100305 }
306
307 // JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
308 // object layer as each routine may require different target machine
309 // settings and no Reactor routine directly links against another.
310 class JITRoutine : public rr::Routine
311 {
312 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
313 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
314 public:
Ben Clayton68cfc782019-06-29 12:31:08 +0100315 JITRoutine(
316 std::unique_ptr<llvm::Module> module,
317 llvm::Function **funcs,
318 size_t count,
Ben Clayton55bc37a2019-07-04 12:17:12 +0100319 const rr::Config &config) :
Ben Clayton6f8e5652019-06-29 01:58:02 +0100320 resolver(createLegacyLookupResolver(
321 session,
322 [&](const std::string &name) {
323 void *func = rr::resolveExternalSymbol(name.c_str());
324 if (func != nullptr)
325 {
326 return llvm::JITSymbol(
327 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
328 }
329 return objLayer.findSymbol(name, true);
330 },
331 [](llvm::Error err) {
332 if (err)
333 {
334 // TODO: Log the symbol resolution errors.
335 return;
336 }
337 })),
Ben Clayton52ce1e92019-07-15 11:41:00 +0100338 targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel())),
Ben Clayton6f8e5652019-06-29 01:58:02 +0100339 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
340 objLayer(
341 session,
342 [this](llvm::orc::VModuleKey) {
343 return ObjLayer::Resources{std::make_shared<llvm::SectionMemoryManager>(), resolver};
344 },
345 ObjLayer::NotifyLoadedFtor(),
346 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
347#ifdef ENABLE_RR_DEBUG_INFO
348 rr::DebugInfo::NotifyObjectEmitted(Obj, L);
349#endif // ENABLE_RR_DEBUG_INFO
350 },
351 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
352#ifdef ENABLE_RR_DEBUG_INFO
353 rr::DebugInfo::NotifyFreeingObject(Obj);
354#endif // ENABLE_RR_DEBUG_INFO
355 }
356 ),
357 addresses(count)
358 {
359 std::vector<std::string> mangledNames(count);
360 for (size_t i = 0; i < count; i++)
361 {
362 auto func = funcs[i];
363 static size_t numEmittedFunctions = 0;
364 std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
365 func->setName(name);
366 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
367 func->setDoesNotThrow();
368
369 llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
370 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, JITGlobals::get()->dataLayout);
371 }
372
373 auto moduleKey = session.allocateVModule();
374
375 // Once the module is passed to the compileLayer, the
376 // llvm::Functions are freed. Make sure funcs are not referenced
377 // after this point.
378 funcs = nullptr;
379
380 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(module)));
381
382 // Resolve the function addresses.
383 for (size_t i = 0; i < count; i++)
384 {
385 auto symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
386 if(auto address = symbol.getAddress())
387 {
388 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(address.get()));
389 }
390 }
391 }
392
393 const void *getEntry(int index) override
394 {
395 return addresses[index];
396 }
397
398 private:
399 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
Ben Clayton52ce1e92019-07-15 11:41:00 +0100400 std::shared_ptr<llvm::TargetMachine> targetMachine;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100401 llvm::orc::ExecutionSession session;
402 CompileLayer compileLayer;
403 ObjLayer objLayer;
404 std::vector<const void *> addresses;
405 };
406
407 // JITBuilder holds all the LLVM state for building routines.
408 class JITBuilder
409 {
410 public:
Ben Clayton55bc37a2019-07-04 12:17:12 +0100411 JITBuilder(const rr::Config &config) :
412 config(config),
Ben Clayton6f8e5652019-06-29 01:58:02 +0100413 module(new llvm::Module("", context)),
414 builder(new llvm::IRBuilder<>(context))
415 {
416 module->setDataLayout(JITGlobals::get()->dataLayout);
417 }
418
Ben Clayton55bc37a2019-07-04 12:17:12 +0100419 void optimize(const rr::Config &cfg)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100420 {
Ben Clayton55bc37a2019-07-04 12:17:12 +0100421
Ben Clayton6f8e5652019-06-29 01:58:02 +0100422#ifdef ENABLE_RR_DEBUG_INFO
423 if (debugInfo != nullptr)
424 {
425 return; // Don't optimize if we're generating debug info.
426 }
427#endif // ENABLE_RR_DEBUG_INFO
428
429 std::unique_ptr<llvm::legacy::PassManager> passManager(
430 new llvm::legacy::PassManager());
431
Ben Clayton55bc37a2019-07-04 12:17:12 +0100432 for(auto pass : cfg.getOptimization().getPasses())
Ben Clayton6f8e5652019-06-29 01:58:02 +0100433 {
Ben Clayton55bc37a2019-07-04 12:17:12 +0100434 switch(pass)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100435 {
Ben Clayton55bc37a2019-07-04 12:17:12 +0100436 case rr::Optimization::Pass::Disabled: break;
437 case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
438 case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break;
439 case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
440 case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break;
441 case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
442 case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break;
443 case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
444 case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break;
445 case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
Ben Clayton28ae0a42019-07-10 00:50:23 +0100446 case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100447 default:
Ben Clayton55bc37a2019-07-04 12:17:12 +0100448 UNREACHABLE("pass: %d", int(pass));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100449 }
450 }
451
452 passManager->run(*module);
453 }
454
Ben Clayton6897e9b2019-07-16 17:27:27 +0100455 std::shared_ptr<rr::Routine> acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
Ben Clayton6f8e5652019-06-29 01:58:02 +0100456 {
457 ASSERT(module);
Ben Clayton6897e9b2019-07-16 17:27:27 +0100458 return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100459 }
460
Ben Clayton55bc37a2019-07-04 12:17:12 +0100461 const rr::Config config;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100462 llvm::LLVMContext context;
463 std::unique_ptr<llvm::Module> module;
464 std::unique_ptr<llvm::IRBuilder<>> builder;
465 llvm::Function *function = nullptr;
466
467 struct CoroutineState
468 {
469 llvm::Function *await = nullptr;
470 llvm::Function *destroy = nullptr;
471 llvm::Value *handle = nullptr;
472 llvm::Value *id = nullptr;
473 llvm::Value *promise = nullptr;
Ben Clayton16da2812019-07-09 23:28:51 +0100474 llvm::Type *yieldType = nullptr;
475 llvm::BasicBlock *entryBlock = nullptr;
Ben Clayton6f8e5652019-06-29 01:58:02 +0100476 llvm::BasicBlock *suspendBlock = nullptr;
477 llvm::BasicBlock *endBlock = nullptr;
478 llvm::BasicBlock *destroyBlock = nullptr;
479 };
480 CoroutineState coroutine;
481
482#ifdef ENABLE_RR_DEBUG_INFO
483 std::unique_ptr<rr::DebugInfo> debugInfo;
484#endif
485 };
486
487 std::unique_ptr<JITBuilder> jit;
488 std::mutex codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800489
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000490#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000491 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
492 {
493 size_t pos = 0;
494 while((pos = str.find(substr, pos)) != std::string::npos) {
495 str.replace(pos, substr.length(), replacement);
496 pos += replacement.length();
497 }
498 return str;
499 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000500#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000501
Ben Clayton40a885e2019-06-23 19:12:48 +0100502 template <typename T>
503 T alignUp(T val, T alignment)
504 {
505 return alignment * ((val + alignment - 1) / alignment);
506 }
507
508 void* alignedAlloc(size_t size, size_t alignment)
509 {
510 ASSERT(alignment < 256);
511 auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
512 auto aligned = allocation;
513 aligned += sizeof(uint8_t); // Make space for the base-address offset.
514 aligned = reinterpret_cast<uint8_t*>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment)); // align
515 auto offset = static_cast<uint8_t>(aligned - allocation);
516 aligned[-1] = offset;
517 return aligned;
518 }
519
520 void alignedFree(void* ptr)
521 {
522 auto aligned = reinterpret_cast<uint8_t*>(ptr);
523 auto offset = aligned[-1];
524 auto allocation = aligned - offset;
525 delete[] allocation;
526 }
527
Logan Chien0eedc8c2018-08-21 09:34:28 +0800528 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
529 {
530 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
531
532 llvm::VectorType *extTy =
533 llvm::VectorType::getExtendedElementVectorType(ty);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100534 x = jit->builder->CreateZExt(x, extTy);
535 y = jit->builder->CreateZExt(y, extTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800536
537 // (x + y + 1) >> 1
538 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100539 llvm::Value *res = jit->builder->CreateAdd(x, y);
540 res = jit->builder->CreateAdd(res, one);
541 res = jit->builder->CreateLShr(res, one);
542 return jit->builder->CreateTrunc(res, ty);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800543 }
544
545 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800546 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800547 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100548 return jit->builder->CreateSelect(jit->builder->CreateICmp(pred, x, y), x, y);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800549 }
550
551 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800552 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800553 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100554 return jit->builder->CreateSExt(jit->builder->CreateICmp(pred, x, y), dstTy, "");
Logan Chien0eedc8c2018-08-21 09:34:28 +0800555 }
556
Logan Chiene3191012018-08-24 22:01:50 +0800557#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800558 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
559 {
560 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
561 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
562
563 llvm::Value *undef = llvm::UndefValue::get(srcTy);
564 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
565 std::iota(mask.begin(), mask.end(), 0);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100566 llvm::Value *v = jit->builder->CreateShuffleVector(op, undef, mask);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800567
Ben Clayton6f8e5652019-06-29 01:58:02 +0100568 return sext ? jit->builder->CreateSExt(v, dstTy)
569 : jit->builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800570 }
571
572 llvm::Value *lowerPABS(llvm::Value *v)
573 {
574 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
Ben Clayton6f8e5652019-06-29 01:58:02 +0100575 llvm::Value *cmp = jit->builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
576 llvm::Value *neg = jit->builder->CreateNeg(v);
577 return jit->builder->CreateSelect(cmp, v, neg);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800578 }
579#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800580
581#if !defined(__i386__) && !defined(__x86_64__)
582 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800583 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800584 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100585 return jit->builder->CreateSelect(jit->builder->CreateFCmp(pred, x, y), x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800586 }
587
Logan Chien83fc07a2018-09-26 22:14:00 +0800588 llvm::Value *lowerRound(llvm::Value *x)
589 {
590 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100591 jit->module.get(), llvm::Intrinsic::nearbyint, {x->getType()});
592 return jit->builder->CreateCall(nearbyint, ARGS(x));
Logan Chien83fc07a2018-09-26 22:14:00 +0800593 }
594
Logan Chien2faa24a2018-09-26 19:59:32 +0800595 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
596 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100597 return jit->builder->CreateFPToSI(lowerRound(x), ty);
Logan Chien2faa24a2018-09-26 19:59:32 +0800598 }
599
Logan Chien40a60052018-09-26 19:03:53 +0800600 llvm::Value *lowerFloor(llvm::Value *x)
601 {
602 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100603 jit->module.get(), llvm::Intrinsic::floor, {x->getType()});
604 return jit->builder->CreateCall(floor, ARGS(x));
Logan Chien40a60052018-09-26 19:03:53 +0800605 }
606
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800607 llvm::Value *lowerTrunc(llvm::Value *x)
608 {
609 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100610 jit->module.get(), llvm::Intrinsic::trunc, {x->getType()});
611 return jit->builder->CreateCall(trunc, ARGS(x));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800612 }
613
Logan Chiene3191012018-08-24 22:01:50 +0800614 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800615 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800616 {
Logan Chien28794cf2018-09-26 18:58:03 +0800617 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
618 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
619
620 unsigned numBits = ty->getScalarSizeInBits();
621
622 llvm::Value *max, *min, *extX, *extY;
623 if (isSigned)
624 {
625 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
626 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100627 extX = jit->builder->CreateSExt(x, extTy);
628 extY = jit->builder->CreateSExt(y, extTy);
Logan Chien28794cf2018-09-26 18:58:03 +0800629 }
630 else
631 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400632 ASSERT_MSG(numBits <= 64, "numBits: %d", int(numBits));
Logan Chien28794cf2018-09-26 18:58:03 +0800633 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
634 max = llvm::ConstantInt::get(extTy, maxVal, false);
635 min = llvm::ConstantInt::get(extTy, 0, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100636 extX = jit->builder->CreateZExt(x, extTy);
637 extY = jit->builder->CreateZExt(y, extTy);
Logan Chien28794cf2018-09-26 18:58:03 +0800638 }
639
Ben Clayton6f8e5652019-06-29 01:58:02 +0100640 llvm::Value *res = isAdd ? jit->builder->CreateAdd(extX, extY)
641 : jit->builder->CreateSub(extX, extY);
Logan Chien28794cf2018-09-26 18:58:03 +0800642
643 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
644 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
645
Ben Clayton6f8e5652019-06-29 01:58:02 +0100646 return jit->builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800647 }
648
649 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
650 {
Logan Chien28794cf2018-09-26 18:58:03 +0800651 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800652 }
653
654 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
655 {
Logan Chien28794cf2018-09-26 18:58:03 +0800656 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800657 }
658
659 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
660 {
Logan Chien28794cf2018-09-26 18:58:03 +0800661 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800662 }
663
664 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
665 {
Logan Chien28794cf2018-09-26 18:58:03 +0800666 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800667 }
668
669 llvm::Value *lowerSQRT(llvm::Value *x)
670 {
671 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
Ben Clayton6f8e5652019-06-29 01:58:02 +0100672 jit->module.get(), llvm::Intrinsic::sqrt, {x->getType()});
673 return jit->builder->CreateCall(sqrt, ARGS(x));
Logan Chiene3191012018-08-24 22:01:50 +0800674 }
675
676 llvm::Value *lowerRCP(llvm::Value *x)
677 {
678 llvm::Type *ty = x->getType();
679 llvm::Constant *one;
680 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
681 {
682 one = llvm::ConstantVector::getSplat(
683 vectorTy->getNumElements(),
684 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
685 }
686 else
687 {
688 one = llvm::ConstantFP::get(ty, 1);
689 }
Ben Clayton6f8e5652019-06-29 01:58:02 +0100690 return jit->builder->CreateFDiv(one, x);
Logan Chiene3191012018-08-24 22:01:50 +0800691 }
692
693 llvm::Value *lowerRSQRT(llvm::Value *x)
694 {
695 return lowerRCP(lowerSQRT(x));
696 }
697
698 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
699 {
700 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
701 llvm::Value *y = llvm::ConstantVector::getSplat(
702 ty->getNumElements(),
703 llvm::ConstantInt::get(ty->getElementType(), scalarY));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100704 return jit->builder->CreateShl(x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800705 }
706
707 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
708 {
709 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
710 llvm::Value *y = llvm::ConstantVector::getSplat(
711 ty->getNumElements(),
712 llvm::ConstantInt::get(ty->getElementType(), scalarY));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100713 return jit->builder->CreateAShr(x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800714 }
715
716 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
717 {
718 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
719 llvm::Value *y = llvm::ConstantVector::getSplat(
720 ty->getNumElements(),
721 llvm::ConstantInt::get(ty->getElementType(), scalarY));
Ben Clayton6f8e5652019-06-29 01:58:02 +0100722 return jit->builder->CreateLShr(x, y);
Logan Chiene3191012018-08-24 22:01:50 +0800723 }
724
725 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
726 {
727 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
728 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
729
Ben Clayton6f8e5652019-06-29 01:58:02 +0100730 llvm::Value *extX = jit->builder->CreateSExt(x, extTy);
731 llvm::Value *extY = jit->builder->CreateSExt(y, extTy);
732 llvm::Value *mult = jit->builder->CreateMul(extX, extY);
Logan Chiene3191012018-08-24 22:01:50 +0800733
734 llvm::Value *undef = llvm::UndefValue::get(extTy);
735
736 llvm::SmallVector<uint32_t, 16> evenIdx;
737 llvm::SmallVector<uint32_t, 16> oddIdx;
738 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
739 {
740 evenIdx.push_back(i);
741 oddIdx.push_back(i + 1);
742 }
743
Ben Clayton6f8e5652019-06-29 01:58:02 +0100744 llvm::Value *lhs = jit->builder->CreateShuffleVector(mult, undef, evenIdx);
745 llvm::Value *rhs = jit->builder->CreateShuffleVector(mult, undef, oddIdx);
746 return jit->builder->CreateAdd(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +0800747 }
748
Logan Chiene3191012018-08-24 22:01:50 +0800749 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
750 {
751 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
752 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
753
754 llvm::IntegerType *dstElemTy =
755 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
756
757 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
Ben Claytoneb50d252019-04-15 13:50:01 -0400758 ASSERT_MSG(truncNumBits < 64, "shift 64 must be handled separately. truncNumBits: %d", int(truncNumBits));
Logan Chiene3191012018-08-24 22:01:50 +0800759 llvm::Constant *max, *min;
760 if (isSigned)
761 {
762 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
763 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
764 }
765 else
766 {
767 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
768 min = llvm::ConstantInt::get(srcTy, 0, false);
769 }
770
771 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
772 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
773 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
774 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
775
Ben Clayton6f8e5652019-06-29 01:58:02 +0100776 x = jit->builder->CreateTrunc(x, dstTy);
777 y = jit->builder->CreateTrunc(y, dstTy);
Logan Chiene3191012018-08-24 22:01:50 +0800778
779 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
780 std::iota(index.begin(), index.end(), 0);
781
Ben Clayton6f8e5652019-06-29 01:58:02 +0100782 return jit->builder->CreateShuffleVector(x, y, index);
Logan Chiene3191012018-08-24 22:01:50 +0800783 }
784
785 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
786 {
787 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
788 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100789 llvm::Value *cmp = jit->builder->CreateICmpSLT(x, zero);
Logan Chiene3191012018-08-24 22:01:50 +0800790
Ben Clayton6f8e5652019-06-29 01:58:02 +0100791 llvm::Value *ret = jit->builder->CreateZExt(
792 jit->builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
Logan Chiene3191012018-08-24 22:01:50 +0800793 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
794 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100795 llvm::Value *elem = jit->builder->CreateZExt(
796 jit->builder->CreateExtractElement(cmp, i), retTy);
797 ret = jit->builder->CreateOr(ret, jit->builder->CreateShl(elem, i));
Logan Chiene3191012018-08-24 22:01:50 +0800798 }
799 return ret;
800 }
801
802 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
803 {
804 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
805 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
Ben Clayton6f8e5652019-06-29 01:58:02 +0100806 llvm::Value *cmp = jit->builder->CreateFCmpULT(x, zero);
Logan Chiene3191012018-08-24 22:01:50 +0800807
Ben Clayton6f8e5652019-06-29 01:58:02 +0100808 llvm::Value *ret = jit->builder->CreateZExt(
809 jit->builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
Logan Chiene3191012018-08-24 22:01:50 +0800810 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
811 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100812 llvm::Value *elem = jit->builder->CreateZExt(
813 jit->builder->CreateExtractElement(cmp, i), retTy);
814 ret = jit->builder->CreateOr(ret, jit->builder->CreateShl(elem, i));
Logan Chiene3191012018-08-24 22:01:50 +0800815 }
816 return ret;
817 }
818#endif // !defined(__i386__) && !defined(__x86_64__)
Chris Forbese86b6dc2019-03-01 09:08:47 -0800819
820 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
821 {
822 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
823 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
824
825 llvm::Value *extX, *extY;
826 if (sext)
827 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100828 extX = jit->builder->CreateSExt(x, extTy);
829 extY = jit->builder->CreateSExt(y, extTy);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800830 }
831 else
832 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100833 extX = jit->builder->CreateZExt(x, extTy);
834 extY = jit->builder->CreateZExt(y, extTy);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800835 }
836
Ben Clayton6f8e5652019-06-29 01:58:02 +0100837 llvm::Value *mult = jit->builder->CreateMul(extX, extY);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800838
839 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
Ben Clayton6f8e5652019-06-29 01:58:02 +0100840 llvm::Value *mulh = jit->builder->CreateAShr(mult, intTy->getBitWidth());
841 return jit->builder->CreateTrunc(mulh, ty);
Chris Forbese86b6dc2019-03-01 09:08:47 -0800842 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400843}
844
Nicolas Capens48461502018-08-06 14:20:45 -0400845namespace rr
John Bauman89401822014-05-06 15:04:28 -0400846{
Ben Claytonc7904162019-04-17 17:35:48 -0400847 const Capabilities Caps =
848 {
849 true, // CallSupported
Ben Clayton1c82c7b2019-04-30 12:49:27 +0100850 true, // CoroutinesSupported
Ben Claytonc7904162019-04-17 17:35:48 -0400851 };
852
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400853 static std::memory_order atomicOrdering(llvm::AtomicOrdering memoryOrder)
854 {
855 switch(memoryOrder)
856 {
857 case llvm::AtomicOrdering::Monotonic: return std::memory_order_relaxed; // https://llvm.org/docs/Atomics.html#monotonic
858 case llvm::AtomicOrdering::Acquire: return std::memory_order_acquire;
859 case llvm::AtomicOrdering::Release: return std::memory_order_release;
860 case llvm::AtomicOrdering::AcquireRelease: return std::memory_order_acq_rel;
861 case llvm::AtomicOrdering::SequentiallyConsistent: return std::memory_order_seq_cst;
862 default:
Ben Claytonfb280672019-04-25 11:16:15 +0100863 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400864 return std::memory_order_acq_rel;
865 }
866 }
867
868 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
869 {
870 if(!atomic)
871 {
872 return llvm::AtomicOrdering::NotAtomic;
873 }
874
875 switch(memoryOrder)
876 {
877 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
878 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
879 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
880 case std::memory_order_release: return llvm::AtomicOrdering::Release;
881 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
882 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
883 default:
884 UNREACHABLE("memoryOrder: %d", int(memoryOrder));
885 return llvm::AtomicOrdering::AcquireRelease;
886 }
887 }
888
889 template <typename T>
890 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
891 {
892 *reinterpret_cast<T*>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), atomicOrdering(ordering));
893 }
894
895 template <typename T>
896 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
897 {
898 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T>*>(ptr), *reinterpret_cast<T*>(val), atomicOrdering(ordering));
899 }
900
Chris Forbesfd4c96d2019-06-20 11:20:42 -0700901#ifdef __ANDROID__
902 template<typename F>
903 static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
904 {
905 // Build an arbitrary op out of looped CAS
906 for (;;)
907 {
908 uint32_t expected = *ptr;
909 uint32_t desired = f(expected, val);
910
911 if (expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
912 return expected;
913 }
914 }
915#endif
916
Ben Clayton6f8e5652019-06-29 01:58:02 +0100917 void* resolveExternalSymbol(const char* name)
Logan Chien40a60052018-09-26 19:03:53 +0800918 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100919 struct Atomic
Logan Chien40a60052018-09-26 19:03:53 +0800920 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100921 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400922 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100923 switch (size)
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400924 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100925 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
926 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
927 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
928 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
929 default:
930 UNIMPLEMENTED("Atomic::load(size: %d)", int(size));
Ben Clayton4d1f8d02019-04-17 23:47:35 -0400931 }
Ben Clayton6f8e5652019-06-29 01:58:02 +0100932 }
933 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
Chris Forbes9283b252019-06-17 09:44:28 -0700934 {
Ben Clayton6f8e5652019-06-29 01:58:02 +0100935 switch (size)
936 {
937 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
938 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
939 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
940 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
941 default:
942 UNIMPLEMENTED("Atomic::store(size: %d)", int(size));
943 }
944 }
945 };
Chris Forbes9283b252019-06-17 09:44:28 -0700946
Ben Clayton6f8e5652019-06-29 01:58:02 +0100947 struct F
948 {
949 static void nop() {}
950 static void neverCalled() { UNREACHABLE("Should never be called"); }
951
952 static void* coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
953 static void coroutine_free_frame(void* ptr) { alignedFree(ptr); }
Ben Clayton40a885e2019-06-23 19:12:48 +0100954
Chris Forbes9283b252019-06-17 09:44:28 -0700955#ifdef __ANDROID__
Ben Clayton6f8e5652019-06-29 01:58:02 +0100956 // forwarders since we can't take address of builtins
957 static void sync_synchronize() { __sync_synchronize(); }
958 static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
959 static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
960 static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
961 static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
962 static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
963 static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
964 static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
Chris Forbesfd4c96d2019-06-20 11:20:42 -0700965
Ben Clayton6f8e5652019-06-29 01:58:02 +0100966 static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a,b);}); }
967 static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a,b);}); }
968 static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a,b);}); }
969 static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val) { return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a,b);}); }
Chris Forbes9283b252019-06-17 09:44:28 -0700970#endif
Ben Clayton6f8e5652019-06-29 01:58:02 +0100971 };
Ben Claytonac07ed82019-03-26 14:17:41 +0000972
Ben Clayton6f8e5652019-06-29 01:58:02 +0100973 class Resolver
974 {
975 public:
976 using FunctionMap = std::unordered_map<std::string, void *>;
Ben Claytonb5f1a132019-06-24 11:00:53 +0100977
Ben Clayton6f8e5652019-06-29 01:58:02 +0100978 FunctionMap functions;
Ben Claytonb5f1a132019-06-24 11:00:53 +0100979
Ben Clayton6f8e5652019-06-29 01:58:02 +0100980 Resolver()
981 {
982 functions.emplace("nop", reinterpret_cast<void*>(F::nop));
983 functions.emplace("floorf", reinterpret_cast<void*>(floorf));
984 functions.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
985 functions.emplace("truncf", reinterpret_cast<void*>(truncf));
986 functions.emplace("printf", reinterpret_cast<void*>(printf));
987 functions.emplace("puts", reinterpret_cast<void*>(puts));
988 functions.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytonb5f1a132019-06-24 11:00:53 +0100989
Ben Clayton6f8e5652019-06-29 01:58:02 +0100990 functions.emplace("sinf", reinterpret_cast<void*>(sinf));
991 functions.emplace("cosf", reinterpret_cast<void*>(cosf));
992 functions.emplace("asinf", reinterpret_cast<void*>(asinf));
993 functions.emplace("acosf", reinterpret_cast<void*>(acosf));
994 functions.emplace("atanf", reinterpret_cast<void*>(atanf));
995 functions.emplace("sinhf", reinterpret_cast<void*>(sinhf));
996 functions.emplace("coshf", reinterpret_cast<void*>(coshf));
997 functions.emplace("tanhf", reinterpret_cast<void*>(tanhf));
998 functions.emplace("asinhf", reinterpret_cast<void*>(asinhf));
999 functions.emplace("acoshf", reinterpret_cast<void*>(acoshf));
1000 functions.emplace("atanhf", reinterpret_cast<void*>(atanhf));
1001 functions.emplace("atan2f", reinterpret_cast<void*>(atan2f));
1002 functions.emplace("powf", reinterpret_cast<void*>(powf));
1003 functions.emplace("expf", reinterpret_cast<void*>(expf));
1004 functions.emplace("logf", reinterpret_cast<void*>(logf));
1005 functions.emplace("exp2f", reinterpret_cast<void*>(exp2f));
1006 functions.emplace("log2f", reinterpret_cast<void*>(log2f));
Ben Clayton14740062019-04-09 13:48:41 -04001007
Ben Clayton6f8e5652019-06-29 01:58:02 +01001008 functions.emplace("sin", reinterpret_cast<void*>(static_cast<double(*)(double)>(sin)));
1009 functions.emplace("cos", reinterpret_cast<void*>(static_cast<double(*)(double)>(cos)));
1010 functions.emplace("asin", reinterpret_cast<void*>(static_cast<double(*)(double)>(asin)));
1011 functions.emplace("acos", reinterpret_cast<void*>(static_cast<double(*)(double)>(acos)));
1012 functions.emplace("atan", reinterpret_cast<void*>(static_cast<double(*)(double)>(atan)));
1013 functions.emplace("sinh", reinterpret_cast<void*>(static_cast<double(*)(double)>(sinh)));
1014 functions.emplace("cosh", reinterpret_cast<void*>(static_cast<double(*)(double)>(cosh)));
1015 functions.emplace("tanh", reinterpret_cast<void*>(static_cast<double(*)(double)>(tanh)));
1016 functions.emplace("asinh", reinterpret_cast<void*>(static_cast<double(*)(double)>(asinh)));
1017 functions.emplace("acosh", reinterpret_cast<void*>(static_cast<double(*)(double)>(acosh)));
1018 functions.emplace("atanh", reinterpret_cast<void*>(static_cast<double(*)(double)>(atanh)));
1019 functions.emplace("atan2", reinterpret_cast<void*>(static_cast<double(*)(double,double)>(atan2)));
1020 functions.emplace("pow", reinterpret_cast<void*>(static_cast<double(*)(double,double)>(pow)));
1021 functions.emplace("exp", reinterpret_cast<void*>(static_cast<double(*)(double)>(exp)));
1022 functions.emplace("log", reinterpret_cast<void*>(static_cast<double(*)(double)>(log)));
1023 functions.emplace("exp2", reinterpret_cast<void*>(static_cast<double(*)(double)>(exp2)));
1024 functions.emplace("log2", reinterpret_cast<void*>(static_cast<double(*)(double)>(log2)));
1025
1026 functions.emplace("atomic_load", reinterpret_cast<void*>(Atomic::load));
1027 functions.emplace("atomic_store", reinterpret_cast<void*>(Atomic::store));
1028
1029 // FIXME (b/119409619): use an allocator here so we can control all memory allocations
1030 functions.emplace("coroutine_alloc_frame", reinterpret_cast<void*>(F::coroutine_alloc_frame));
1031 functions.emplace("coroutine_free_frame", reinterpret_cast<void*>(F::coroutine_free_frame));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001032
Ben Clayton14740062019-04-09 13:48:41 -04001033#ifdef __APPLE__
Ben Clayton6f8e5652019-06-29 01:58:02 +01001034 functions.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
Ben Clayton14740062019-04-09 13:48:41 -04001035#elif defined(__linux__)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001036 functions.emplace("sincosf", reinterpret_cast<void*>(sincosf));
Ben Clayton2f58df32019-06-23 21:29:25 +01001037#elif defined(_WIN64)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001038 functions.emplace("chkstk", reinterpret_cast<void*>(__chkstk));
Ben Clayton2f58df32019-06-23 21:29:25 +01001039#elif defined(_WIN32)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001040 functions.emplace("chkstk", reinterpret_cast<void*>(_chkstk));
Ben Clayton2f58df32019-06-23 21:29:25 +01001041#endif
Chris Forbes9283b252019-06-17 09:44:28 -07001042
1043#ifdef __ANDROID__
Ben Clayton6f8e5652019-06-29 01:58:02 +01001044 functions.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void*>(F::neverCalled));
1045 functions.emplace("sync_synchronize", reinterpret_cast<void*>(F::sync_synchronize));
1046 functions.emplace("sync_fetch_and_add_4", reinterpret_cast<void*>(F::sync_fetch_and_add_4));
1047 functions.emplace("sync_fetch_and_and_4", reinterpret_cast<void*>(F::sync_fetch_and_and_4));
1048 functions.emplace("sync_fetch_and_or_4", reinterpret_cast<void*>(F::sync_fetch_and_or_4));
1049 functions.emplace("sync_fetch_and_xor_4", reinterpret_cast<void*>(F::sync_fetch_and_xor_4));
1050 functions.emplace("sync_fetch_and_sub_4", reinterpret_cast<void*>(F::sync_fetch_and_sub_4));
1051 functions.emplace("sync_lock_test_and_set_4", reinterpret_cast<void*>(F::sync_lock_test_and_set_4));
1052 functions.emplace("sync_val_compare_and_swap_4", reinterpret_cast<void*>(F::sync_val_compare_and_swap_4));
1053 functions.emplace("sync_fetch_and_max_4", reinterpret_cast<void*>(F::sync_fetch_and_max_4));
1054 functions.emplace("sync_fetch_and_min_4", reinterpret_cast<void*>(F::sync_fetch_and_min_4));
1055 functions.emplace("sync_fetch_and_umax_4", reinterpret_cast<void*>(F::sync_fetch_and_umax_4));
1056 functions.emplace("sync_fetch_and_umin_4", reinterpret_cast<void*>(F::sync_fetch_and_umin_4));
1057 #endif
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001058 }
Ben Clayton6f8e5652019-06-29 01:58:02 +01001059 };
Ben Clayton1c82c7b2019-04-30 12:49:27 +01001060
Ben Clayton6f8e5652019-06-29 01:58:02 +01001061 static Resolver resolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +08001062
Ben Clayton6f8e5652019-06-29 01:58:02 +01001063 // Trim off any underscores from the start of the symbol. LLVM likes
1064 // to append these on macOS.
1065 const char* trimmed = name;
1066 while (trimmed[0] == '_') { trimmed++; }
Ben Claytoncee3dff2019-05-22 12:01:22 +01001067
Ben Clayton6f8e5652019-06-29 01:58:02 +01001068 auto it = resolver.functions.find(trimmed);
1069 // Missing functions will likely make the module fail in exciting non-obvious ways.
1070 ASSERT_MSG(it != resolver.functions.end(), "Missing external function: '%s'", name);
1071 return it->second;
1072 }
Logan Chien52cde602018-09-03 19:37:57 +08001073
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001074 // The abstract Type* types are implemented as LLVM types, except that
1075 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
1076 // and VFP in ARM, and eliminate the overhead of converting them to explicit
1077 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
1078 // as abstract pointers with small enum values.
1079 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001080 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001081 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001082 Type_v2i32,
1083 Type_v4i16,
1084 Type_v2i16,
1085 Type_v8i8,
1086 Type_v4i8,
1087 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001088 EmulatedTypeCount,
1089 // Returned by asInternalType() to indicate that the abstract Type*
1090 // should be interpreted as LLVM type pointer:
1091 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001092 };
1093
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001094 inline InternalType asInternalType(Type *type)
1095 {
1096 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
1097 return (t < EmulatedTypeCount) ? t : Type_LLVM;
1098 }
1099
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001100 llvm::Type *T(Type *t)
1101 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001102 // Use 128-bit vectors to implement logically shorter ones.
1103 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001104 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001105 case Type_v2i32: return T(Int4::getType());
1106 case Type_v4i16: return T(Short8::getType());
1107 case Type_v2i16: return T(Short8::getType());
1108 case Type_v8i8: return T(Byte16::getType());
1109 case Type_v4i8: return T(Byte16::getType());
1110 case Type_v2f32: return T(Float4::getType());
1111 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
Ben Claytoneb50d252019-04-15 13:50:01 -04001112 default:
1113 UNREACHABLE("asInternalType(t): %d", int(asInternalType(t)));
1114 return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001115 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001116 }
1117
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001118 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001119 {
1120 return reinterpret_cast<Type*>(t);
1121 }
1122
Nicolas Capensac230122016-09-20 14:30:06 -04001123 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
1124 {
1125 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
1126 }
1127
Logan Chien191b3052018-08-31 16:57:15 +08001128 inline llvm::BasicBlock *B(BasicBlock *t)
1129 {
1130 return reinterpret_cast<llvm::BasicBlock*>(t);
1131 }
1132
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001133 inline BasicBlock *B(llvm::BasicBlock *t)
1134 {
1135 return reinterpret_cast<BasicBlock*>(t);
1136 }
1137
Nicolas Capens01a97962017-07-28 17:30:51 -04001138 static size_t typeSize(Type *type)
1139 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001140 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001141 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001142 case Type_v2i32: return 8;
1143 case Type_v4i16: return 8;
1144 case Type_v2i16: return 4;
1145 case Type_v8i8: return 8;
1146 case Type_v4i8: return 4;
1147 case Type_v2f32: return 8;
1148 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -04001149 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001150 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001151
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001152 if(t->isPointerTy())
1153 {
1154 return sizeof(void*);
1155 }
1156
1157 // At this point we should only have LLVM 'primitive' types.
1158 unsigned int bits = t->getPrimitiveSizeInBits();
Ben Claytoneb50d252019-04-15 13:50:01 -04001159 ASSERT_MSG(bits != 0, "bits: %d", int(bits));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001160
1161 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
1162 // but are typically stored as one byte. The DataLayout structure should
1163 // be used here and many other places if this assumption fails.
1164 return (bits + 7) / 8;
1165 }
1166 break;
1167 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001168 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001169 return 0;
1170 }
Nicolas Capens01a97962017-07-28 17:30:51 -04001171 }
1172
Nicolas Capens69674fb2017-09-01 11:08:44 -04001173 static unsigned int elementCount(Type *type)
1174 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001175 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -04001176 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001177 case Type_v2i32: return 2;
1178 case Type_v4i16: return 4;
1179 case Type_v2i16: return 2;
1180 case Type_v8i8: return 8;
1181 case Type_v4i8: return 4;
1182 case Type_v2f32: return 2;
1183 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
Ben Claytoneb50d252019-04-15 13:50:01 -04001184 default:
1185 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1186 return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -04001187 }
Nicolas Capens69674fb2017-09-01 11:08:44 -04001188 }
1189
Ben Claytonea38f952019-06-17 13:56:56 +01001190 static ::llvm::Function* createFunction(const char *name, ::llvm::Type *retTy, const std::vector<::llvm::Type*> &params)
1191 {
1192 llvm::FunctionType *functionType = llvm::FunctionType::get(retTy, params, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001193 auto func = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, name, jit->module.get());
Ben Claytonea38f952019-06-17 13:56:56 +01001194 func->setDoesNotThrow();
1195 func->setCallingConv(llvm::CallingConv::C);
Ben Claytonea38f952019-06-17 13:56:56 +01001196 return func;
1197 }
1198
John Bauman89401822014-05-06 15:04:28 -04001199 Nucleus::Nucleus()
1200 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -04001201 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -04001202
Ben Clayton6f8e5652019-06-29 01:58:02 +01001203 ASSERT(jit == nullptr);
Ben Clayton55bc37a2019-07-04 12:17:12 +01001204 jit.reset(new JITBuilder(Nucleus::getDefaultConfig()));
John Bauman89401822014-05-06 15:04:28 -04001205 }
1206
1207 Nucleus::~Nucleus()
1208 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001209 jit.reset();
Nicolas Capens3bbc5e12016-09-27 10:49:52 -04001210 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -04001211 }
1212
Ben Clayton55bc37a2019-07-04 12:17:12 +01001213 void Nucleus::setDefaultConfig(const Config &cfg)
John Bauman89401822014-05-06 15:04:28 -04001214 {
Ben Clayton55bc37a2019-07-04 12:17:12 +01001215 std::unique_lock<std::mutex> lock(::defaultConfigLock);
1216 ::defaultConfig() = cfg;
1217 }
1218
1219 void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
1220 {
1221 std::unique_lock<std::mutex> lock(::defaultConfigLock);
1222 auto &config = ::defaultConfig();
1223 config = cfgEdit.apply(config);
1224 }
1225
1226 Config Nucleus::getDefaultConfig()
1227 {
1228 std::unique_lock<std::mutex> lock(::defaultConfigLock);
1229 return ::defaultConfig();
1230 }
1231
Ben Clayton6897e9b2019-07-16 17:27:27 +01001232 std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
Ben Clayton55bc37a2019-07-04 12:17:12 +01001233 {
1234 auto cfg = cfgEdit.apply(jit->config);
1235
Ben Clayton6f8e5652019-06-29 01:58:02 +01001236 if(jit->builder->GetInsertBlock()->empty() || !jit->builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -04001237 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001238 llvm::Type *type = jit->function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -04001239
1240 if(type->isVoidTy())
1241 {
1242 createRetVoid();
1243 }
1244 else
1245 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001246 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -04001247 }
1248 }
John Bauman89401822014-05-06 15:04:28 -04001249
Ben Clayton97c13ad2019-05-02 11:59:30 +01001250#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01001251 if (jit->debugInfo != nullptr)
Ben Clayton97c13ad2019-05-02 11:59:30 +01001252 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001253 jit->debugInfo->Finalize();
Ben Clayton97c13ad2019-05-02 11:59:30 +01001254 }
1255#endif // ENABLE_RR_DEBUG_INFO
1256
John Bauman89401822014-05-06 15:04:28 -04001257 if(false)
1258 {
Ben Clayton5875be52019-04-11 14:57:40 -04001259 std::error_code error;
1260 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001261 jit->module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001262 }
1263
Ben Clayton5375f472019-06-24 13:33:11 +01001264#if defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
Ben Clayton4b944652019-05-02 10:56:19 +01001265 {
1266 llvm::legacy::PassManager pm;
1267 pm.add(llvm::createVerifierPass());
Ben Clayton6f8e5652019-06-29 01:58:02 +01001268 pm.run(*jit->module);
Ben Clayton4b944652019-05-02 10:56:19 +01001269 }
Ben Clayton5375f472019-06-24 13:33:11 +01001270#endif // defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
Ben Clayton4b944652019-05-02 10:56:19 +01001271
Ben Clayton55bc37a2019-07-04 12:17:12 +01001272 jit->optimize(cfg);
John Bauman89401822014-05-06 15:04:28 -04001273
1274 if(false)
1275 {
Ben Clayton5875be52019-04-11 14:57:40 -04001276 std::error_code error;
1277 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001278 jit->module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001279 }
1280
Ben Clayton55bc37a2019-07-04 12:17:12 +01001281 auto routine = jit->acquireRoutine(&jit->function, 1, cfg);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001282 jit.reset();
John Bauman89401822014-05-06 15:04:28 -04001283
John Bauman89401822014-05-06 15:04:28 -04001284 return routine;
1285 }
1286
John Bauman19bac1e2014-05-06 15:23:49 -04001287 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001288 {
1289 // Need to allocate it in the entry block for mem2reg to work
Ben Clayton6f8e5652019-06-29 01:58:02 +01001290 llvm::BasicBlock &entryBlock = jit->function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001291
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001292 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001293
1294 if(arraySize)
1295 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001296 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
John Bauman89401822014-05-06 15:04:28 -04001297 }
1298 else
1299 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08001300 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
John Bauman89401822014-05-06 15:04:28 -04001301 }
1302
1303 entryBlock.getInstList().push_front(declaration);
1304
Nicolas Capens19336542016-09-26 10:32:29 -04001305 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001306 }
1307
1308 BasicBlock *Nucleus::createBasicBlock()
1309 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001310 return B(llvm::BasicBlock::Create(jit->context, "", jit->function));
John Bauman89401822014-05-06 15:04:28 -04001311 }
1312
1313 BasicBlock *Nucleus::getInsertBlock()
1314 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001315 return B(jit->builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001316 }
1317
1318 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1319 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001320 // assert(jit->builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001321
1322 Variable::materializeAll();
1323
Ben Clayton6f8e5652019-06-29 01:58:02 +01001324 jit->builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001325 }
1326
Nicolas Capensac230122016-09-20 14:30:06 -04001327 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001328 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001329 jit->function = rr::createFunction("", T(ReturnType), T(Params));
Nicolas Capens52551d12018-09-13 14:30:56 -04001330
Ben Claytonac07ed82019-03-26 14:17:41 +00001331#ifdef ENABLE_RR_DEBUG_INFO
Ben Claytone031f362019-07-20 12:35:40 +01001332 jit->debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(jit->builder.get(), &jit->context, jit->module.get(), jit->function));
Ben Claytonac07ed82019-03-26 14:17:41 +00001333#endif // ENABLE_RR_DEBUG_INFO
1334
Ben Clayton6f8e5652019-06-29 01:58:02 +01001335 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->function));
John Bauman89401822014-05-06 15:04:28 -04001336 }
1337
Nicolas Capens19336542016-09-26 10:32:29 -04001338 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001339 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001340 llvm::Function::arg_iterator args = jit->function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001341
1342 while(index)
1343 {
1344 args++;
1345 index--;
1346 }
1347
Nicolas Capens19336542016-09-26 10:32:29 -04001348 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001349 }
1350
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001351 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001352 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001353 RR_DEBUG_INFO_UPDATE_LOC();
1354
Ben Clayton6f8e5652019-06-29 01:58:02 +01001355 ASSERT_MSG(jit->function->getReturnType() == T(Void::getType()), "Return type mismatch");
Ben Claytonc958b172019-05-02 12:20:59 +01001356
Nicolas Capens0192d152019-03-27 14:46:07 -04001357 // Code generated after this point is unreachable, so any variables
1358 // being read can safely return an undefined value. We have to avoid
1359 // materializing variables after the terminator ret instruction.
1360 Variable::killUnmaterialized();
1361
Ben Clayton6f8e5652019-06-29 01:58:02 +01001362 jit->builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001363 }
1364
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001365 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001366 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001367 RR_DEBUG_INFO_UPDATE_LOC();
1368
Ben Clayton6f8e5652019-06-29 01:58:02 +01001369 ASSERT_MSG(jit->function->getReturnType() == V(v)->getType(), "Return type mismatch");
Ben Claytonc958b172019-05-02 12:20:59 +01001370
Nicolas Capens0192d152019-03-27 14:46:07 -04001371 // Code generated after this point is unreachable, so any variables
1372 // being read can safely return an undefined value. We have to avoid
1373 // materializing variables after the terminator ret instruction.
1374 Variable::killUnmaterialized();
1375
Ben Clayton6f8e5652019-06-29 01:58:02 +01001376 jit->builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001377 }
1378
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001379 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001380 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001381 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001382 Variable::materializeAll();
1383
Ben Clayton6f8e5652019-06-29 01:58:02 +01001384 jit->builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001385 }
1386
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001387 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001388 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001389 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens0192d152019-03-27 14:46:07 -04001390 Variable::materializeAll();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001391 jit->builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001392 }
1393
1394 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1395 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001396 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001397 return V(jit->builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001398 }
1399
1400 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1401 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001402 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001403 return V(jit->builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001404 }
1405
1406 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1407 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001408 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001409 return V(jit->builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001410 }
1411
1412 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1413 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001414 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001415 return V(jit->builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001416 }
1417
1418 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1419 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001420 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001421 return V(jit->builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001422 }
1423
1424 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1425 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001426 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001427 return V(jit->builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001428 }
1429
1430 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1431 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001432 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001433 return V(jit->builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001434 }
1435
1436 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1437 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001438 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001439 return V(jit->builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001440 }
1441
1442 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1443 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001444 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001445 return V(jit->builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001446 }
1447
1448 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1449 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001450 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001451 return V(jit->builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001452 }
1453
1454 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1455 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001456 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001457 return V(jit->builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001458 }
1459
1460 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1461 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001462 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001463 return V(jit->builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001464 }
1465
1466 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1467 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001468 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001469 return V(jit->builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001470 }
1471
1472 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1473 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001474 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001475 return V(jit->builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001476 }
1477
1478 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1479 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001480 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001481 return V(jit->builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001482 }
1483
1484 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1485 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001486 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001487 return V(jit->builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001488 }
1489
1490 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1491 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001492 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001493 return V(jit->builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001494 }
1495
1496 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1497 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001498 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001499 return V(jit->builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001500 }
1501
Nicolas Capens19336542016-09-26 10:32:29 -04001502 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001503 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001504 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001505 return V(jit->builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001506 }
1507
Nicolas Capens19336542016-09-26 10:32:29 -04001508 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001509 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001510 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001511 return V(jit->builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001512 }
1513
Nicolas Capens19336542016-09-26 10:32:29 -04001514 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001515 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001516 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001517 return V(jit->builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001518 }
1519
Nicolas Capens86509d92019-03-21 13:23:50 -04001520 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001521 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001522 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001523 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001524 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001525 case Type_v2i32:
1526 case Type_v4i16:
1527 case Type_v8i8:
1528 case Type_v2f32:
1529 return createBitCast(
1530 createInsertElement(
1531 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001532 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001533 0),
1534 type);
1535 case Type_v2i16:
1536 case Type_v4i8:
1537 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001538 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001539 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001540 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001541 i = createZExt(i, Long::getType());
1542 Value *v = createInsertElement(u, i, 0);
1543 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001544 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001545 // Fallthrough to non-emulated case.
1546 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001547 {
Ben Clayton99e57192019-05-03 13:25:08 +01001548 auto elTy = T(type);
1549 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001550
1551 if (!atomic)
Ben Clayton99e57192019-05-03 13:25:08 +01001552 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001553 return V(jit->builder->CreateAlignedLoad(V(ptr), alignment, isVolatile));
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001554 }
1555 else if (elTy->isIntegerTy() || elTy->isPointerTy())
1556 {
1557 // Integers and pointers can be atomically loaded by setting
1558 // the ordering constraint on the load instruction.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001559 auto load = jit->builder->CreateAlignedLoad(V(ptr), alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001560 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1561 return V(load);
1562 }
1563 else if (elTy->isFloatTy() || elTy->isDoubleTy())
1564 {
1565 // LLVM claims to support atomic loads of float types as
1566 // above, but certain backends cannot deal with this.
1567 // Load as an integer and bitcast. See b/136037244.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001568 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
1569 auto elAsIntTy = ::llvm::IntegerType::get(jit->context, size * 8);
1570 auto ptrCast = jit->builder->CreatePointerCast(V(ptr), elAsIntTy->getPointerTo());
1571 auto load = jit->builder->CreateAlignedLoad(ptrCast, alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001572 load->setAtomic(atomicOrdering(atomic, memoryOrder));
Ben Clayton6f8e5652019-06-29 01:58:02 +01001573 auto loadCast = jit->builder->CreateBitCast(load, elTy);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001574 return V(loadCast);
1575 }
1576 else
1577 {
1578 // More exotic types require falling back to the extern:
1579 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001580 auto sizetTy = ::llvm::IntegerType::get(jit->context, sizeof(size_t) * 8);
1581 auto intTy = ::llvm::IntegerType::get(jit->context, sizeof(int) * 8);
1582 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001583 auto i8PtrTy = i8Ty->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001584 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001585 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001586 auto func = jit->module->getOrInsertFunction("__atomic_load", funcTy);
1587 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
Ben Clayton99e57192019-05-03 13:25:08 +01001588 auto out = allocateStackVariable(type);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001589 jit->builder->CreateCall(func, {
Ben Clayton99e57192019-05-03 13:25:08 +01001590 ::llvm::ConstantInt::get(sizetTy, size),
Ben Clayton6f8e5652019-06-29 01:58:02 +01001591 jit->builder->CreatePointerCast(V(ptr), i8PtrTy),
1592 jit->builder->CreatePointerCast(V(out), i8PtrTy),
Ben Clayton99e57192019-05-03 13:25:08 +01001593 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1594 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01001595 return V(jit->builder->CreateLoad(V(out)));
Ben Clayton99e57192019-05-03 13:25:08 +01001596 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001597 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001598 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001599 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1600 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001601 }
John Bauman89401822014-05-06 15:04:28 -04001602 }
1603
Nicolas Capens86509d92019-03-21 13:23:50 -04001604 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001605 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001606 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001607 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001608 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001609 case Type_v2i32:
1610 case Type_v4i16:
1611 case Type_v8i8:
1612 case Type_v2f32:
1613 createStore(
1614 createExtractElement(
1615 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1616 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001617 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001618 return value;
1619 case Type_v2i16:
1620 case Type_v4i8:
1621 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001622 {
Logan Chien191b3052018-08-31 16:57:15 +08001623 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001624 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1625 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001626 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001627 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001628 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001629 // Fallthrough to non-emulated case.
1630 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001631 {
Ben Clayton99e57192019-05-03 13:25:08 +01001632 auto elTy = T(type);
1633 ASSERT(V(ptr)->getType()->getContainedType(0) == elTy);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001634
1635 if (!atomic)
Ben Clayton99e57192019-05-03 13:25:08 +01001636 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001637 jit->builder->CreateAlignedStore(V(value), V(ptr), alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001638 }
1639 else if (elTy->isIntegerTy() || elTy->isPointerTy())
1640 {
1641 // Integers and pointers can be atomically stored by setting
1642 // the ordering constraint on the store instruction.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001643 auto store = jit->builder->CreateAlignedStore(V(value), V(ptr), alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001644 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1645 }
1646 else if (elTy->isFloatTy() || elTy->isDoubleTy())
1647 {
1648 // LLVM claims to support atomic stores of float types as
1649 // above, but certain backends cannot deal with this.
1650 // Store as an bitcast integer. See b/136037244.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001651 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
1652 auto elAsIntTy = ::llvm::IntegerType::get(jit->context, size * 8);
1653 auto valCast = jit->builder->CreateBitCast(V(value), elAsIntTy);
1654 auto ptrCast = jit->builder->CreatePointerCast(V(ptr), elAsIntTy->getPointerTo());
1655 auto store = jit->builder->CreateAlignedStore(valCast, ptrCast, alignment, isVolatile);
Ben Clayton2fa1dad2019-06-26 14:07:05 +01001656 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1657 }
1658 else
1659 {
1660 // More exotic types require falling back to the extern:
1661 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
Ben Clayton6f8e5652019-06-29 01:58:02 +01001662 auto sizetTy = ::llvm::IntegerType::get(jit->context, sizeof(size_t) * 8);
1663 auto intTy = ::llvm::IntegerType::get(jit->context, sizeof(int) * 8);
1664 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001665 auto i8PtrTy = i8Ty->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001666 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
Ben Clayton99e57192019-05-03 13:25:08 +01001667 auto funcTy = ::llvm::FunctionType::get(voidTy, {sizetTy, i8PtrTy, i8PtrTy, intTy}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001668 auto func = jit->module->getOrInsertFunction("__atomic_store", funcTy);
1669 auto size = jit->module->getDataLayout().getTypeStoreSize(elTy);
Ben Clayton99e57192019-05-03 13:25:08 +01001670 auto copy = allocateStackVariable(type);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001671 jit->builder->CreateStore(V(value), V(copy));
1672 jit->builder->CreateCall(func, {
Ben Clayton99e57192019-05-03 13:25:08 +01001673 ::llvm::ConstantInt::get(sizetTy, size),
Ben Clayton6f8e5652019-06-29 01:58:02 +01001674 jit->builder->CreatePointerCast(V(ptr), i8PtrTy),
1675 jit->builder->CreatePointerCast(V(copy), i8PtrTy),
Ben Clayton99e57192019-05-03 13:25:08 +01001676 ::llvm::ConstantInt::get(intTy, uint64_t(atomicOrdering(true, memoryOrder))),
1677 });
1678 }
Nicolas Capens86509d92019-03-21 13:23:50 -04001679
1680 return value;
1681 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001682 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001683 UNREACHABLE("asInternalType(type): %d", int(asInternalType(type)));
1684 return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001685 }
John Bauman89401822014-05-06 15:04:28 -04001686 }
1687
Ben Clayton0b00b952019-07-03 15:51:19 +01001688 Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001689 {
1690 ASSERT(V(ptr)->getType()->isPointerTy());
1691 ASSERT(V(mask)->getType()->isVectorTy());
1692
1693 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001694 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1695 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001696 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1697 auto elVecPtrTy = elVecTy->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001698 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Clayton0b00b952019-07-03 15:51:19 +01001699 auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001700 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001701 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_load, { elVecTy, elVecPtrTy } );
1702 return V(jit->builder->CreateCall(func, { V(ptr), align, i8Mask, passthrough }));
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001703 }
1704
1705 void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
1706 {
1707 ASSERT(V(ptr)->getType()->isPointerTy());
1708 ASSERT(V(val)->getType()->isVectorTy());
1709 ASSERT(V(mask)->getType()->isVectorTy());
1710
1711 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001712 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1713 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001714 auto elVecTy = V(val)->getType();
1715 auto elVecPtrTy = elVecTy->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001716 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001717 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001718 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_store, { elVecTy, elVecPtrTy } );
1719 jit->builder->CreateCall(func, { V(val), V(ptr), align, i8Mask });
Ben Claytoncb2ebc92019-06-20 00:18:03 +01001720 }
1721
Ben Clayton0b00b952019-07-03 15:51:19 +01001722 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
Ben Clayton0fc611f2019-04-18 11:23:27 -04001723 {
1724 ASSERT(V(base)->getType()->isPointerTy());
1725 ASSERT(V(offsets)->getType()->isVectorTy());
1726 ASSERT(V(mask)->getType()->isVectorTy());
1727
1728 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001729 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1730 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
1731 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton0fc611f2019-04-18 11:23:27 -04001732 auto i8PtrTy = i8Ty->getPointerTo();
1733 auto elPtrTy = T(elTy)->getPointerTo();
1734 auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
1735 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001736 auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
1737 auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
1738 auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1739 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Clayton0b00b952019-07-03 15:51:19 +01001740 auto passthrough = zeroMaskedLanes ? ::llvm::Constant::getNullValue(elVecTy) : llvm::UndefValue::get(elVecTy);
Ben Clayton0fc611f2019-04-18 11:23:27 -04001741 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001742 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_gather, { elVecTy, elPtrVecTy } );
1743 return V(jit->builder->CreateCall(func, { elPtrs, align, i8Mask, passthrough }));
Ben Clayton0fc611f2019-04-18 11:23:27 -04001744 }
1745
1746 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment)
1747 {
1748 ASSERT(V(base)->getType()->isPointerTy());
1749 ASSERT(V(val)->getType()->isVectorTy());
1750 ASSERT(V(offsets)->getType()->isVectorTy());
1751 ASSERT(V(mask)->getType()->isVectorTy());
1752
1753 auto numEls = V(mask)->getType()->getVectorNumElements();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001754 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
1755 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
1756 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton0fc611f2019-04-18 11:23:27 -04001757 auto i8PtrTy = i8Ty->getPointerTo();
1758 auto elVecTy = V(val)->getType();
1759 auto elTy = elVecTy->getVectorElementType();
1760 auto elPtrTy = elTy->getPointerTo();
1761 auto elPtrVecTy = ::llvm::VectorType::get(elPtrTy, numEls);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001762 auto i8Base = jit->builder->CreatePointerCast(V(base), i8PtrTy);
1763 auto i8Ptrs = jit->builder->CreateGEP(i8Base, V(offsets));
1764 auto elPtrs = jit->builder->CreatePointerCast(i8Ptrs, elPtrVecTy);
1765 auto i8Mask = jit->builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
Ben Clayton0fc611f2019-04-18 11:23:27 -04001766 auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
Ben Clayton6f8e5652019-06-29 01:58:02 +01001767 auto func = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::masked_scatter, { elVecTy, elPtrVecTy } );
1768 jit->builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
Ben Clayton0fc611f2019-04-18 11:23:27 -04001769 }
1770
Ben Claytonb16c5862019-05-08 14:01:38 +01001771 void Nucleus::createFence(std::memory_order memoryOrder)
1772 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001773 jit->builder->CreateFence(atomicOrdering(true, memoryOrder));
Ben Claytonb16c5862019-05-08 14:01:38 +01001774 }
1775
Nicolas Capensd294def2017-01-26 17:44:37 -08001776 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001777 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001778 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04001779 ASSERT(V(ptr)->getType()->getContainedType(0) == T(type));
Nicolas Capens01a97962017-07-28 17:30:51 -04001780 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001781 {
Ben Claytonb1243732019-02-27 23:56:18 +00001782 // LLVM manual: "When indexing into an array, pointer or vector,
1783 // integers of any width are allowed, and they are not required to
1784 // be constant. These integers are treated as signed values where
1785 // relevant."
1786 //
1787 // Thus if we want indexes to be treated as unsigned we have to
1788 // zero-extend them ourselves.
1789 //
1790 // Note that this is not because we want to address anywhere near
1791 // 4 GB of data. Instead this is important for performance because
1792 // x86 supports automatic zero-extending of 32-bit registers to
1793 // 64-bit. Thus when indexing into an array using a uint32 is
1794 // actually faster than an int32.
1795 index = unsignedIndex ?
1796 createZExt(index, Long::getType()) :
1797 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001798 }
Ben Claytonb1243732019-02-27 23:56:18 +00001799
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001800 // For non-emulated types we can rely on LLVM's GEP to calculate the
1801 // effective address correctly.
1802 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001803 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01001804 return V(jit->builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001805 }
1806
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001807 // For emulated types we have to multiply the index by the intended
1808 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001809 index = (sizeof(void*) == 8) ?
1810 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1811 createMul(index, createConstantInt((int)typeSize(type)));
1812
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001813 // Cast to a byte pointer, apply the byte offset, and cast back to the
1814 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001815 return createBitCast(
Ben Clayton6f8e5652019-06-29 01:58:02 +01001816 V(jit->builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
Logan Chien191b3052018-08-31 16:57:15 +08001817 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001818 }
1819
Chris Forbes17813932019-04-18 11:45:54 -07001820 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
John Bauman19bac1e2014-05-06 15:23:49 -04001821 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001822 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001823 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001824 }
1825
Chris Forbes707ed992019-04-18 18:17:35 -07001826 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1827 {
1828 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001829 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Sub, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes707ed992019-04-18 18:17:35 -07001830 }
1831
Chris Forbes17813932019-04-18 11:45:54 -07001832 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1833 {
1834 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001835 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001836 }
1837
1838 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1839 {
1840 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001841 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001842 }
1843
1844 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1845 {
1846 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001847 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001848 }
1849
1850 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1851 {
1852 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001853 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001854 }
1855
1856 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1857 {
1858 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001859 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbes17813932019-04-18 11:45:54 -07001860 }
1861
Chris Forbesf31bdad2019-05-23 14:58:08 -07001862 Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1863 {
1864 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001865 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMin, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbesf31bdad2019-05-23 14:58:08 -07001866 }
1867
1868 Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1869 {
1870 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001871 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::UMax, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
Chris Forbesf31bdad2019-05-23 14:58:08 -07001872 }
1873
1874
Chris Forbes17813932019-04-18 11:45:54 -07001875 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1876 {
1877 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001878 return V(jit->builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
John Bauman19bac1e2014-05-06 15:23:49 -04001879 }
1880
Chris Forbesa16238d2019-04-18 16:31:54 -07001881 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1882 {
1883 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbesc9ca99e2019-04-19 07:53:34 -07001884 // Note: AtomicCmpXchgInstruction returns a 2-member struct containing {result, success-flag}, not the result directly.
Ben Clayton6f8e5652019-06-29 01:58:02 +01001885 return V(jit->builder->CreateExtractValue(
1886 jit->builder->CreateAtomicCmpXchg(V(ptr), V(compare), V(value), atomicOrdering(true, memoryOrderEqual), atomicOrdering(true, memoryOrderUnequal)),
Chris Forbesa16238d2019-04-18 16:31:54 -07001887 llvm::ArrayRef<unsigned>(0u)));
1888 }
1889
Nicolas Capens19336542016-09-26 10:32:29 -04001890 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001891 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001892 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001893 return V(jit->builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001894 }
1895
Nicolas Capens19336542016-09-26 10:32:29 -04001896 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001897 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001898 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001899 return V(jit->builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001900 }
1901
Nicolas Capens19336542016-09-26 10:32:29 -04001902 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001903 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001904 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001905 return V(jit->builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001906 }
1907
Nicolas Capens19336542016-09-26 10:32:29 -04001908 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001909 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001910 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001911 return V(jit->builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001912 }
1913
Nicolas Capens19336542016-09-26 10:32:29 -04001914 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001915 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001916 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001917 return V(jit->builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001918 }
1919
Nicolas Capens19336542016-09-26 10:32:29 -04001920 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001921 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001922 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001923 return V(jit->builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001924 }
1925
Nicolas Capens19336542016-09-26 10:32:29 -04001926 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001927 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001928 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001929 return V(jit->builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001930 }
1931
Nicolas Capens19336542016-09-26 10:32:29 -04001932 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001933 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001934 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04001935 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1936 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1937 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001938 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001939 {
1940 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001941 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1942 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001943 return createLoad(readAddress, destType);
1944 }
Logan Chien191b3052018-08-31 16:57:15 +08001945 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001946 {
Logan Chien191b3052018-08-31 16:57:15 +08001947 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1948 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001949 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1950 return createLoad(readAddress, destType);
1951 }
1952
Ben Clayton6f8e5652019-06-29 01:58:02 +01001953 return V(jit->builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001954 }
1955
Ben Clayton204a4102019-07-31 13:17:47 +01001956 Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1957 {
1958 RR_DEBUG_INFO_UPDATE_LOC();
1959 return V(jit->builder->CreateICmpEQ(V(lhs), V(rhs)));
1960 }
1961
John Bauman89401822014-05-06 15:04:28 -04001962 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1963 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001964 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001965 return V(jit->builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001966 }
1967
1968 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1969 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001970 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001971 return V(jit->builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001972 }
1973
1974 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1975 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001976 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001977 return V(jit->builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001978 }
1979
1980 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1981 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001982 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001983 return V(jit->builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001984 }
1985
1986 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1987 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001988 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001989 return V(jit->builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001990 }
1991
1992 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1993 {
Ben Claytonac07ed82019-03-26 14:17:41 +00001994 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01001995 return V(jit->builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001996 }
1997
1998 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1999 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002000 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002001 return V(jit->builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002002 }
2003
2004 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
2005 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002006 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002007 return V(jit->builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002008 }
2009
2010 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
2011 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002012 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002013 return V(jit->builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002014 }
2015
2016 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
2017 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002018 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002019 return V(jit->builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002020 }
2021
2022 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
2023 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002024 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002025 return V(jit->builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002026 }
2027
2028 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
2029 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002030 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002031 return V(jit->builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002032 }
2033
2034 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
2035 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002036 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002037 return V(jit->builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002038 }
2039
2040 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
2041 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002042 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002043 return V(jit->builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002044 }
2045
2046 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
2047 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002048 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002049 return V(jit->builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002050 }
2051
2052 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
2053 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002054 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002055 return V(jit->builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002056 }
2057
2058 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
2059 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002060 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002061 return V(jit->builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002062 }
2063
2064 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
2065 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002066 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002067 return V(jit->builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002068 }
2069
2070 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
2071 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002072 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002073 return V(jit->builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002074 }
2075
2076 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
2077 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002078 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002079 return V(jit->builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002080 }
2081
2082 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
2083 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002084 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002085 return V(jit->builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002086 }
2087
2088 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
2089 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002090 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002091 return V(jit->builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002092 }
2093
2094 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
2095 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002096 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002097 return V(jit->builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002098 }
2099
2100 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
2101 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002102 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002103 return V(jit->builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04002104 }
2105
Nicolas Capense95d5342016-09-30 11:37:28 -04002106 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04002107 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002108 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytoneb50d252019-04-15 13:50:01 -04002109 ASSERT(V(vector)->getType()->getContainedType(0) == T(type));
Ben Clayton6f8e5652019-06-29 01:58:02 +01002110 return V(jit->builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04002111 }
2112
2113 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
2114 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002115 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002116 return V(jit->builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04002117 }
2118
Logan Chien191b3052018-08-31 16:57:15 +08002119 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04002120 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002121 RR_DEBUG_INFO_UPDATE_LOC();
2122
Logan Chien191b3052018-08-31 16:57:15 +08002123 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04002124 const int maxSize = 16;
2125 llvm::Constant *swizzle[maxSize];
Ben Claytoneb50d252019-04-15 13:50:01 -04002126 ASSERT(size <= maxSize);
Nicolas Capense89cd582016-09-30 14:23:47 -04002127
2128 for(int i = 0; i < size; i++)
2129 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002130 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04002131 }
2132
2133 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
2134
Ben Clayton6f8e5652019-06-29 01:58:02 +01002135 return V(jit->builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04002136 }
2137
Logan Chien191b3052018-08-31 16:57:15 +08002138 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04002139 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002140 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002141 return V(jit->builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04002142 }
2143
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05002144 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04002145 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002146 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002147 return reinterpret_cast<SwitchCases*>(jit->builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04002148 }
2149
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05002150 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04002151 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002152 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002153 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
Ben Clayton6f8e5652019-06-29 01:58:02 +01002154 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04002155 }
2156
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04002157 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04002158 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002159 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002160 jit->builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04002161 }
2162
Nicolas Capensac230122016-09-20 14:30:06 -04002163 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04002164 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002165 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04002166 }
2167
Nicolas Capens13ac2322016-10-13 14:52:12 -04002168 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04002169 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002170 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002171 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04002172 }
2173
Nicolas Capens13ac2322016-10-13 14:52:12 -04002174 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04002175 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002176 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002177 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002178 }
2179
Nicolas Capens13ac2322016-10-13 14:52:12 -04002180 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04002181 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002182 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002183 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002184 }
2185
Nicolas Capens13ac2322016-10-13 14:52:12 -04002186 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04002187 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002188 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002189 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(jit->context), i, false));
John Bauman89401822014-05-06 15:04:28 -04002190 }
2191
Nicolas Capens13ac2322016-10-13 14:52:12 -04002192 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04002193 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002194 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002195 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(jit->context), b));
John Bauman89401822014-05-06 15:04:28 -04002196 }
2197
Nicolas Capens13ac2322016-10-13 14:52:12 -04002198 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04002199 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002200 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002201 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002202 }
2203
Nicolas Capens13ac2322016-10-13 14:52:12 -04002204 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04002205 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002206 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002207 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(jit->context), i, false));
John Bauman89401822014-05-06 15:04:28 -04002208 }
2209
Nicolas Capens13ac2322016-10-13 14:52:12 -04002210 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04002211 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002212 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002213 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(jit->context), i, true));
John Bauman89401822014-05-06 15:04:28 -04002214 }
2215
Nicolas Capens13ac2322016-10-13 14:52:12 -04002216 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04002217 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002218 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01002219 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(jit->context), i, false));
John Bauman89401822014-05-06 15:04:28 -04002220 }
2221
Nicolas Capens13ac2322016-10-13 14:52:12 -04002222 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04002223 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002224 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002225 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04002226 }
2227
Nicolas Capens13ac2322016-10-13 14:52:12 -04002228 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04002229 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002230 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002231 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04002232 }
2233
Nicolas Capens13ac2322016-10-13 14:52:12 -04002234 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04002235 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002236 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04002237 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
2238 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04002239 ASSERT(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002240 llvm::Constant *constantVector[16];
2241
Nicolas Capens69674fb2017-09-01 11:08:44 -04002242 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04002243 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04002244 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002245 }
2246
Nicolas Capens69674fb2017-09-01 11:08:44 -04002247 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04002248 }
2249
2250 Value *Nucleus::createConstantVector(const double *constants, Type *type)
2251 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002252 ASSERT(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04002253 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
2254 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
Ben Claytoneb50d252019-04-15 13:50:01 -04002255 ASSERT(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002256 llvm::Constant *constantVector[8];
2257
Nicolas Capens69674fb2017-09-01 11:08:44 -04002258 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04002259 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04002260 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04002261 }
2262
Nicolas Capens69674fb2017-09-01 11:08:44 -04002263 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04002264 }
2265
John Bauman19bac1e2014-05-06 15:23:49 -04002266 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04002267 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002268 return T(llvm::Type::getVoidTy(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002269 }
2270
John Bauman19bac1e2014-05-06 15:23:49 -04002271 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04002272 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002273 return T(llvm::Type::getInt1Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002274 }
2275
John Bauman19bac1e2014-05-06 15:23:49 -04002276 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04002277 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002278 return T(llvm::Type::getInt8Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002279 }
2280
John Bauman19bac1e2014-05-06 15:23:49 -04002281 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04002282 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002283 return T(llvm::Type::getInt8Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002284 }
2285
John Bauman19bac1e2014-05-06 15:23:49 -04002286 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04002287 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002288 return T(llvm::Type::getInt16Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002289 }
2290
John Bauman19bac1e2014-05-06 15:23:49 -04002291 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04002292 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002293 return T(llvm::Type::getInt16Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002294 }
2295
John Bauman19bac1e2014-05-06 15:23:49 -04002296 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04002297 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002298 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04002299 }
2300
John Bauman19bac1e2014-05-06 15:23:49 -04002301 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04002302 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002303 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04002304 }
2305
John Bauman19bac1e2014-05-06 15:23:49 -04002306 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002307 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002308 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002309#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002310 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002311#else
2312 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2313#endif
John Bauman89401822014-05-06 15:04:28 -04002314 }
John Bauman66b8ab22014-05-06 15:57:45 -04002315
John Bauman19bac1e2014-05-06 15:23:49 -04002316 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002317 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002318 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002319#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002320 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002321#else
2322 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2323#endif
John Bauman89401822014-05-06 15:04:28 -04002324 }
2325
John Bauman19bac1e2014-05-06 15:23:49 -04002326 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04002327 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002328 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002329#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002330 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08002331#else
2332 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2333#endif
John Bauman89401822014-05-06 15:04:28 -04002334 }
2335
John Bauman19bac1e2014-05-06 15:23:49 -04002336// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002337// {
Logan Chiene3191012018-08-24 22:01:50 +08002338//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002339// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08002340//#else
2341// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2342//#endif
John Bauman89401822014-05-06 15:04:28 -04002343// }
John Bauman66b8ab22014-05-06 15:57:45 -04002344
John Bauman19bac1e2014-05-06 15:23:49 -04002345 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04002346 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002347 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002348#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002349 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002350#else
2351 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2352#endif
John Bauman89401822014-05-06 15:04:28 -04002353 }
2354
John Bauman19bac1e2014-05-06 15:23:49 -04002355 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002356 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002357 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002358 }
2359
John Bauman19bac1e2014-05-06 15:23:49 -04002360 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002361 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002362 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002363#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002364 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002365#else
2366 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2367#endif
John Bauman89401822014-05-06 15:04:28 -04002368 }
John Bauman66b8ab22014-05-06 15:57:45 -04002369
John Bauman19bac1e2014-05-06 15:23:49 -04002370 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002371 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002372 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002373#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002374 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002375#else
2376 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2377#endif
John Bauman89401822014-05-06 15:04:28 -04002378 }
2379
John Bauman19bac1e2014-05-06 15:23:49 -04002380 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04002381 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002382 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002383#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002384 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002385#else
2386 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2387#endif
John Bauman89401822014-05-06 15:04:28 -04002388 }
2389
John Bauman19bac1e2014-05-06 15:23:49 -04002390 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002391 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002392 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002393#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002394 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002395#else
2396 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2397#endif
John Bauman89401822014-05-06 15:04:28 -04002398 }
John Bauman66b8ab22014-05-06 15:57:45 -04002399
John Bauman19bac1e2014-05-06 15:23:49 -04002400 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04002401 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002402 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002403#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002404 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08002405#else
2406 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2407#endif
John Bauman89401822014-05-06 15:04:28 -04002408 }
2409
John Bauman19bac1e2014-05-06 15:23:49 -04002410 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04002411 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002412 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04002413 }
2414
John Bauman19bac1e2014-05-06 15:23:49 -04002415 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002416 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002417 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002418 }
2419
John Bauman19bac1e2014-05-06 15:23:49 -04002420 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04002421 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002422 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04002423 }
2424
Nicolas Capens16b5f152016-10-13 13:39:01 -04002425 Type *Short2::getType()
2426 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002427 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002428 }
2429
Nicolas Capens16b5f152016-10-13 13:39:01 -04002430 Type *UShort2::getType()
2431 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002432 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04002433 }
2434
John Bauman19bac1e2014-05-06 15:23:49 -04002435 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04002436 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002437 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens01a97962017-07-28 17:30:51 -04002438 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04002439 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2440
Nicolas Capens01a97962017-07-28 17:30:51 -04002441 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2442 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04002443
John Bauman66b8ab22014-05-06 15:57:45 -04002444 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04002445 }
2446
John Bauman19bac1e2014-05-06 15:23:49 -04002447// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002448// {
2449// }
2450
John Bauman19bac1e2014-05-06 15:23:49 -04002451 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002452 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002453 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002454 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002455#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002456 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08002457#else
2458 Value *v = v4i32.loadValue();
2459 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
2460#endif
John Bauman66b8ab22014-05-06 15:57:45 -04002461
2462 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04002463 }
2464
John Bauman19bac1e2014-05-06 15:23:49 -04002465 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002466 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002467 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002468#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002469 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2470
2471 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002472#else
2473 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
2474#endif
John Bauman89401822014-05-06 15:04:28 -04002475 }
2476
John Bauman19bac1e2014-05-06 15:23:49 -04002477 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002478 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002479 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002480#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002481 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002482#else
2483 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2484#endif
John Bauman89401822014-05-06 15:04:28 -04002485 }
2486
John Bauman19bac1e2014-05-06 15:23:49 -04002487 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002488 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002489 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002490#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002491 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002492#else
2493 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
2494#endif
John Bauman89401822014-05-06 15:04:28 -04002495 }
2496
John Bauman19bac1e2014-05-06 15:23:49 -04002497 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002498 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002499 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002500#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002501 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002502#else
2503 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
2504#endif
John Bauman89401822014-05-06 15:04:28 -04002505 }
2506
John Bauman19bac1e2014-05-06 15:23:49 -04002507 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002508 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002509 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002510#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002511 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002512#else
2513 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
2514#endif
John Bauman89401822014-05-06 15:04:28 -04002515 }
2516
John Bauman19bac1e2014-05-06 15:23:49 -04002517 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002518 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002519 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002520#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002521 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002522#else
2523 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
2524#endif
John Bauman89401822014-05-06 15:04:28 -04002525 }
2526
John Bauman19bac1e2014-05-06 15:23:49 -04002527 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002528 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002529 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002530#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002531 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002532#else
2533 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2534#endif
John Bauman89401822014-05-06 15:04:28 -04002535 }
2536
John Bauman19bac1e2014-05-06 15:23:49 -04002537 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002538 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002539 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002540#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002541 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002542#else
2543 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
2544#endif
John Bauman89401822014-05-06 15:04:28 -04002545 }
2546
Nicolas Capens33438a62017-09-27 11:47:35 -04002547 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002548 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002549 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002550#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002551 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002552#else
2553 auto result = V(lowerPack(V(x.value), V(y.value), true));
2554#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04002555 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04002556 }
2557
Nicolas Capens33438a62017-09-27 11:47:35 -04002558 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2559 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002560 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002561#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002562 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002563#else
2564 auto result = V(lowerPack(V(x.value), V(y.value), false));
2565#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002566 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2567 }
2568
John Bauman19bac1e2014-05-06 15:23:49 -04002569 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002570 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002571 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002572#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002573 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002574#else
2575 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
2576#endif
John Bauman89401822014-05-06 15:04:28 -04002577 }
2578
John Bauman19bac1e2014-05-06 15:23:49 -04002579 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04002580 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002581 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002582#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002583 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002584#else
2585 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
2586#endif
John Bauman89401822014-05-06 15:04:28 -04002587 }
2588
John Bauman19bac1e2014-05-06 15:23:49 -04002589 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04002590 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002591 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002592 }
2593
John Bauman19bac1e2014-05-06 15:23:49 -04002594 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002595 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002596 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002597 if(saturate)
2598 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002599#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002600 if(CPUID::supportsSSE4_1())
2601 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002602 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002603 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002604 }
2605 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002606#endif
John Bauman89401822014-05-06 15:04:28 -04002607 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002608 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002609 }
2610 }
2611 else
2612 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002613 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002614 }
2615 }
2616
John Bauman19bac1e2014-05-06 15:23:49 -04002617 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002618 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002619 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002620#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002621 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2622
2623 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002624#else
2625 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2626#endif
John Bauman89401822014-05-06 15:04:28 -04002627 }
2628
John Bauman19bac1e2014-05-06 15:23:49 -04002629 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002630 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002631 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002632#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002633 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2634
2635 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002636#else
2637 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2638#endif
John Bauman89401822014-05-06 15:04:28 -04002639 }
2640
John Bauman19bac1e2014-05-06 15:23:49 -04002641 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002642 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002643 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002644 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002645 }
2646
John Bauman19bac1e2014-05-06 15:23:49 -04002647 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002648 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002649 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04002650 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002651 }
2652
John Bauman19bac1e2014-05-06 15:23:49 -04002653 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002654 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002655 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002656#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002657 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002658#else
2659 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2660#endif
John Bauman89401822014-05-06 15:04:28 -04002661 }
2662
John Bauman19bac1e2014-05-06 15:23:49 -04002663 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002664 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002665 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002666#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002667 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002668#else
2669 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2670#endif
John Bauman89401822014-05-06 15:04:28 -04002671 }
2672
John Bauman19bac1e2014-05-06 15:23:49 -04002673 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002674 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002675 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002676#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002677 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002678#else
2679 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2680#endif
John Bauman89401822014-05-06 15:04:28 -04002681 }
2682
John Bauman19bac1e2014-05-06 15:23:49 -04002683 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002684 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002685 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002686#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002687 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002688#else
2689 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2690#endif
John Bauman89401822014-05-06 15:04:28 -04002691 }
2692
John Bauman19bac1e2014-05-06 15:23:49 -04002693 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002694 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002695 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002696 }
2697
John Bauman19bac1e2014-05-06 15:23:49 -04002698 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002699 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002700 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002701#if defined(__i386__) || defined(__x86_64__)
2702 return x86::psllw(lhs, rhs);
2703#else
2704 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2705#endif
John Bauman89401822014-05-06 15:04:28 -04002706 }
2707
John Bauman19bac1e2014-05-06 15:23:49 -04002708 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002709 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002710 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002711#if defined(__i386__) || defined(__x86_64__)
2712 return x86::psraw(lhs, rhs);
2713#else
2714 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2715#endif
John Bauman89401822014-05-06 15:04:28 -04002716 }
2717
John Bauman19bac1e2014-05-06 15:23:49 -04002718 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002719 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002720 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002721#if defined(__i386__) || defined(__x86_64__)
2722 return x86::pmaddwd(x, y);
2723#else
2724 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2725#endif
John Bauman89401822014-05-06 15:04:28 -04002726 }
2727
John Bauman19bac1e2014-05-06 15:23:49 -04002728 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002729 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002730 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002731#if defined(__i386__) || defined(__x86_64__)
2732 return x86::pmulhw(x, y);
2733#else
2734 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2735#endif
John Bauman89401822014-05-06 15:04:28 -04002736 }
2737
John Bauman19bac1e2014-05-06 15:23:49 -04002738 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002739 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002740 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002741 }
2742
John Bauman19bac1e2014-05-06 15:23:49 -04002743 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002744 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002745 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002746#if defined(__i386__) || defined(__x86_64__)
2747 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2748#else
2749 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2750#endif
John Bauman89401822014-05-06 15:04:28 -04002751 }
2752
John Bauman19bac1e2014-05-06 15:23:49 -04002753 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002754 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002755 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002756#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002757 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002758#else
2759 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2760#endif
John Bauman89401822014-05-06 15:04:28 -04002761 }
2762
John Bauman19bac1e2014-05-06 15:23:49 -04002763 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002764 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002765 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense89cd582016-09-30 14:23:47 -04002766 int pshufb[16] =
2767 {
2768 select0 + 0,
2769 select0 + 1,
2770 select1 + 0,
2771 select1 + 1,
2772 select2 + 0,
2773 select2 + 1,
2774 select3 + 0,
2775 select3 + 1,
2776 select4 + 0,
2777 select4 + 1,
2778 select5 + 0,
2779 select5 + 1,
2780 select6 + 0,
2781 select6 + 1,
2782 select7 + 0,
2783 select7 + 1,
2784 };
John Bauman89401822014-05-06 15:04:28 -04002785
2786 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002787 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002788 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2789
2790 return RValue<UShort8>(short8);
2791 }
2792
John Bauman19bac1e2014-05-06 15:23:49 -04002793 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002794 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002795 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002796#if defined(__i386__) || defined(__x86_64__)
2797 return x86::pmulhuw(x, y);
2798#else
2799 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2800#endif
John Bauman89401822014-05-06 15:04:28 -04002801 }
2802
John Bauman19bac1e2014-05-06 15:23:49 -04002803 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002804 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002805 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002806 }
2807
Nicolas Capens96d4e092016-11-18 14:22:38 -05002808 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002809 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002810 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002811 RValue<Int> res = val;
2812
Logan Chien191b3052018-08-31 16:57:15 +08002813 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002814 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002815
2816 return res;
2817 }
2818
Nicolas Capens96d4e092016-11-18 14:22:38 -05002819 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002820 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002821 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002822 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002823 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002824
2825 return val;
2826 }
2827
Nicolas Capens96d4e092016-11-18 14:22:38 -05002828 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002829 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002830 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002831 RValue<Int> res = val;
2832
Logan Chien191b3052018-08-31 16:57:15 +08002833 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002834 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002835
2836 return res;
2837 }
2838
Nicolas Capens96d4e092016-11-18 14:22:38 -05002839 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002840 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002841 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002842 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002843 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002844
2845 return val;
2846 }
2847
John Bauman19bac1e2014-05-06 15:23:49 -04002848 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002849 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002850 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002851#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002852 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002853#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002854 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002855#endif
John Bauman89401822014-05-06 15:04:28 -04002856 }
2857
John Bauman19bac1e2014-05-06 15:23:49 -04002858 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002859 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002860 return T(llvm::Type::getInt32Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002861 }
2862
John Bauman19bac1e2014-05-06 15:23:49 -04002863 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002864 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002865 return T(llvm::Type::getInt64Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002866 }
2867
John Bauman19bac1e2014-05-06 15:23:49 -04002868 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002869 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002870 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04002871 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2872 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002873
Alexis Hetu764d1422016-09-28 08:44:22 -04002874 // Smallest positive value representable in UInt, but not in Int
2875 const unsigned int ustart = 0x80000000u;
2876 const float ustartf = float(ustart);
2877
2878 // If the value is negative, store 0, otherwise store the result of the conversion
2879 storeValue((~(As<Int>(cast) >> 31) &
2880 // Check if the value can be represented as an Int
2881 IfThenElse(cast >= ustartf,
2882 // If the value is too large, subtract ustart and re-add it after conversion.
2883 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2884 // Otherwise, just convert normally
2885 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002886 }
2887
Nicolas Capens96d4e092016-11-18 14:22:38 -05002888 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002889 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002890 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002891 RValue<UInt> res = val;
2892
Logan Chien191b3052018-08-31 16:57:15 +08002893 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002894 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002895
2896 return res;
2897 }
2898
Nicolas Capens96d4e092016-11-18 14:22:38 -05002899 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002900 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002901 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002902 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002903 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002904
2905 return val;
2906 }
2907
Nicolas Capens96d4e092016-11-18 14:22:38 -05002908 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002909 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002910 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04002911 RValue<UInt> res = val;
2912
Logan Chien191b3052018-08-31 16:57:15 +08002913 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002914 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002915
2916 return res;
2917 }
2918
Nicolas Capens96d4e092016-11-18 14:22:38 -05002919 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002920 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002921 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chien191b3052018-08-31 16:57:15 +08002922 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002923 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002924
2925 return val;
2926 }
2927
John Bauman19bac1e2014-05-06 15:23:49 -04002928// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002929// {
Logan Chiene3191012018-08-24 22:01:50 +08002930//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002931// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002932//#else
2933// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2934//#endif
John Bauman89401822014-05-06 15:04:28 -04002935// }
2936
John Bauman19bac1e2014-05-06 15:23:49 -04002937 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002938 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01002939 return T(llvm::Type::getInt32Ty(jit->context));
John Bauman89401822014-05-06 15:04:28 -04002940 }
2941
John Bauman19bac1e2014-05-06 15:23:49 -04002942// Int2::Int2(RValue<Int> cast)
2943// {
John Bauman19bac1e2014-05-06 15:23:49 -04002944// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2945// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002946//
Nicolas Capense89cd582016-09-30 14:23:47 -04002947// int shuffle[2] = {0, 0};
2948// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002949//
John Bauman66b8ab22014-05-06 15:57:45 -04002950// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002951// }
John Bauman89401822014-05-06 15:04:28 -04002952
John Bauman19bac1e2014-05-06 15:23:49 -04002953 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002954 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002955 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002956#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002957 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2958
2959 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002960#else
2961 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2962#endif
John Bauman89401822014-05-06 15:04:28 -04002963 }
2964
John Bauman19bac1e2014-05-06 15:23:49 -04002965 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002966 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002967 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002968#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002969 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2970
2971 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002972#else
2973 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2974#endif
John Bauman89401822014-05-06 15:04:28 -04002975 }
2976
John Bauman19bac1e2014-05-06 15:23:49 -04002977 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002978 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002979 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002980 }
2981
John Bauman19bac1e2014-05-06 15:23:49 -04002982 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002983 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002984 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002985#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002986 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2987
2988 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002989#else
2990 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2991#endif
John Bauman89401822014-05-06 15:04:28 -04002992 }
2993
John Bauman19bac1e2014-05-06 15:23:49 -04002994 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002995 {
Ben Claytonac07ed82019-03-26 14:17:41 +00002996 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08002997#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002998 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2999
3000 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003001#else
3002 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
3003#endif
John Bauman89401822014-05-06 15:04:28 -04003004 }
3005
John Bauman19bac1e2014-05-06 15:23:49 -04003006 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04003007 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003008 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04003009 }
3010
Nicolas Capenscb986762017-01-20 11:34:37 -05003011 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003012 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003013 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003014#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04003015 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003016 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003017 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003018 }
3019 else
Logan Chiene3191012018-08-24 22:01:50 +08003020#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003021 {
Nicolas Capense89cd582016-09-30 14:23:47 -04003022 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04003023 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08003024 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003025
Nicolas Capense89cd582016-09-30 14:23:47 -04003026 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04003027 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08003028 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003029
Nicolas Capens01a97962017-07-28 17:30:51 -04003030 *this = As<Int4>(d);
3031 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003032 }
3033
Nicolas Capenscb986762017-01-20 11:34:37 -05003034 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003035 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003036 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003037#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04003038 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003039 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003040 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003041 }
3042 else
Logan Chiene3191012018-08-24 22:01:50 +08003043#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003044 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003045 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
3046 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
3047 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003048
Nicolas Capense89cd582016-09-30 14:23:47 -04003049 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04003050 Value *c = Nucleus::createBitCast(b, Short8::getType());
3051 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003052
Nicolas Capens01a97962017-07-28 17:30:51 -04003053 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003054 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04003055 }
3056
Nicolas Capenscb986762017-01-20 11:34:37 -05003057 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003058 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003059 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003060#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003061 if(CPUID::supportsSSE4_1())
3062 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003063 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003064 }
3065 else
Logan Chiene3191012018-08-24 22:01:50 +08003066#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003067 {
Nicolas Capense89cd582016-09-30 14:23:47 -04003068 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04003069 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3070 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003071 }
3072 }
3073
Nicolas Capenscb986762017-01-20 11:34:37 -05003074 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003075 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003076 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003077#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003078 if(CPUID::supportsSSE4_1())
3079 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003080 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003081 }
3082 else
Logan Chiene3191012018-08-24 22:01:50 +08003083#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003084 {
Nicolas Capense89cd582016-09-30 14:23:47 -04003085 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04003086 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3087 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04003088 }
3089 }
3090
Nicolas Capenscb986762017-01-20 11:34:37 -05003091 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04003092 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003093 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens24c8cf02016-08-15 15:33:14 -04003094 Value *vector = loadValue();
3095 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3096
Nicolas Capense89cd582016-09-30 14:23:47 -04003097 int swizzle[4] = {0, 0, 0, 0};
3098 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04003099
3100 storeValue(replicate);
3101 }
3102
John Bauman19bac1e2014-05-06 15:23:49 -04003103 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003104 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003105 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003106#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003107 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003108#else
3109 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
3110#endif
John Bauman89401822014-05-06 15:04:28 -04003111 }
3112
John Bauman19bac1e2014-05-06 15:23:49 -04003113 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003114 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003115 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003116#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003117 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003118#else
3119 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
3120#endif
John Bauman89401822014-05-06 15:04:28 -04003121 }
3122
John Bauman19bac1e2014-05-06 15:23:49 -04003123 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3124 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003125 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003126 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003127 }
3128
3129 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3130 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003131 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003132 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003133 }
3134
3135 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3136 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003137 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003138 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003139 }
3140
3141 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3142 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003143 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003144 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003145 }
3146
3147 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3148 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003149 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003150 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003151 }
3152
3153 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3154 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003155 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003156 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003157 }
3158
3159 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3160 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003161 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003162#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003163 if(CPUID::supportsSSE4_1())
3164 {
3165 return x86::pmaxsd(x, y);
3166 }
3167 else
Logan Chiene3191012018-08-24 22:01:50 +08003168#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003169 {
3170 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003171 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003172 }
3173 }
3174
3175 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3176 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003177 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003178#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003179 if(CPUID::supportsSSE4_1())
3180 {
3181 return x86::pminsd(x, y);
3182 }
3183 else
Logan Chiene3191012018-08-24 22:01:50 +08003184#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003185 {
3186 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003187 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003188 }
3189 }
3190
3191 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04003192 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003193 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003194#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003195 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08003196#else
Logan Chien2faa24a2018-09-26 19:59:32 +08003197 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08003198#endif
John Bauman89401822014-05-06 15:04:28 -04003199 }
3200
Chris Forbese86b6dc2019-03-01 09:08:47 -08003201 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
3202 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003203 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08003204 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3205 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
3206 }
3207
3208 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3209 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003210 RR_DEBUG_INFO_UPDATE_LOC();
Chris Forbese86b6dc2019-03-01 09:08:47 -08003211 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3212 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
3213 }
3214
Nicolas Capens33438a62017-09-27 11:47:35 -04003215 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003216 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003217 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003218#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003219 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003220#else
3221 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
3222#endif
John Bauman89401822014-05-06 15:04:28 -04003223 }
3224
Nicolas Capens33438a62017-09-27 11:47:35 -04003225 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3226 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003227 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003228#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04003229 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003230#else
3231 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
3232#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04003233 }
3234
John Bauman19bac1e2014-05-06 15:23:49 -04003235 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003236 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003237 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003238#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003239 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08003240#else
3241 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
3242#endif
John Bauman89401822014-05-06 15:04:28 -04003243 }
3244
John Bauman19bac1e2014-05-06 15:23:49 -04003245 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04003246 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003247 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003248 }
3249
Nicolas Capenscb986762017-01-20 11:34:37 -05003250 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003251 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003252 RR_DEBUG_INFO_UPDATE_LOC();
Alexis Hetu764d1422016-09-28 08:44:22 -04003253 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
3254 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04003255
Alexis Hetu764d1422016-09-28 08:44:22 -04003256 // Smallest positive value representable in UInt, but not in Int
3257 const unsigned int ustart = 0x80000000u;
3258 const float ustartf = float(ustart);
3259
3260 // Check if the value can be represented as an Int
3261 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3262 // If the value is too large, subtract ustart and re-add it after conversion.
3263 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
3264 // Otherwise, just convert normally
3265 (~uiValue & Int4(cast));
3266 // If the value is negative, store 0, otherwise store the result of the conversion
3267 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04003268 }
3269
Ben Clayton88816fa2019-05-15 17:08:14 +01003270 UInt4::UInt4(RValue<UInt> rhs) : XYZW(this)
3271 {
3272 RR_DEBUG_INFO_UPDATE_LOC();
3273 Value *vector = loadValue();
3274 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3275
3276 int swizzle[4] = {0, 0, 0, 0};
3277 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
3278
3279 storeValue(replicate);
3280 }
3281
John Bauman19bac1e2014-05-06 15:23:49 -04003282 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003283 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003284 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003285#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003286 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08003287#else
3288 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
3289#endif
John Bauman89401822014-05-06 15:04:28 -04003290 }
3291
John Bauman19bac1e2014-05-06 15:23:49 -04003292 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04003293 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003294 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003295#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003296 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08003297#else
3298 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
3299#endif
John Bauman89401822014-05-06 15:04:28 -04003300 }
3301
John Bauman19bac1e2014-05-06 15:23:49 -04003302 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3303 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003304 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003305 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003306 }
3307
3308 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3309 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003310 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003311 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
3312 }
3313
3314 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3315 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003316 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003317 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003318 }
3319
3320 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3321 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003322 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003323 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
3324 }
3325
3326 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3327 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003328 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytona7edc1c2019-06-20 12:17:03 +01003329 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
John Bauman19bac1e2014-05-06 15:23:49 -04003330 }
3331
3332 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3333 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003334 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman19bac1e2014-05-06 15:23:49 -04003335 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
3336 }
3337
3338 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3339 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003340 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003341#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003342 if(CPUID::supportsSSE4_1())
3343 {
3344 return x86::pmaxud(x, y);
3345 }
3346 else
Logan Chiene3191012018-08-24 22:01:50 +08003347#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003348 {
3349 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003350 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04003351 }
3352 }
3353
3354 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3355 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003356 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003357#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003358 if(CPUID::supportsSSE4_1())
3359 {
3360 return x86::pminud(x, y);
3361 }
3362 else
Logan Chiene3191012018-08-24 22:01:50 +08003363#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003364 {
3365 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07003366 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04003367 }
3368 }
3369
John Bauman19bac1e2014-05-06 15:23:49 -04003370 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04003371 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003372 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003373 }
3374
Alexis Hetu734e2572018-12-20 14:00:49 -05003375 Type *Half::getType()
3376 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003377 return T(llvm::Type::getInt16Ty(jit->context));
Alexis Hetu734e2572018-12-20 14:00:49 -05003378 }
3379
Nicolas Capens05b3d662016-02-25 23:58:33 -05003380 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003381 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003382 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003383#if defined(__i386__) || defined(__x86_64__)
3384 if(exactAtPow2)
3385 {
3386 // rcpss uses a piecewise-linear approximation which minimizes the relative error
3387 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3388 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3389 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003390 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003391#else
3392 return As<Float>(V(lowerRCP(V(x.value))));
3393#endif
John Bauman89401822014-05-06 15:04:28 -04003394 }
John Bauman66b8ab22014-05-06 15:57:45 -04003395
John Bauman19bac1e2014-05-06 15:23:49 -04003396 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003397 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003398 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003399#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003400 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003401#else
3402 return As<Float>(V(lowerRSQRT(V(x.value))));
3403#endif
John Bauman89401822014-05-06 15:04:28 -04003404 }
3405
John Bauman19bac1e2014-05-06 15:23:49 -04003406 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003407 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003408 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003409#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003410 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08003411#else
3412 return As<Float>(V(lowerSQRT(V(x.value))));
3413#endif
John Bauman89401822014-05-06 15:04:28 -04003414 }
3415
John Bauman19bac1e2014-05-06 15:23:49 -04003416 RValue<Float> Round(RValue<Float> x)
3417 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003418 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003419#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003420 if(CPUID::supportsSSE4_1())
3421 {
3422 return x86::roundss(x, 0);
3423 }
3424 else
3425 {
3426 return Float4(Round(Float4(x))).x;
3427 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003428#else
3429 return RValue<Float>(V(lowerRound(V(x.value))));
3430#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003431 }
3432
3433 RValue<Float> Trunc(RValue<Float> x)
3434 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003435 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003436#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003437 if(CPUID::supportsSSE4_1())
3438 {
3439 return x86::roundss(x, 3);
3440 }
3441 else
3442 {
3443 return Float(Int(x)); // Rounded toward zero
3444 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003445#else
3446 return RValue<Float>(V(lowerTrunc(V(x.value))));
3447#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003448 }
3449
3450 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003451 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003452 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003453#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003454 if(CPUID::supportsSSE4_1())
3455 {
3456 return x - x86::floorss(x);
3457 }
3458 else
3459 {
John Bauman19bac1e2014-05-06 15:23:49 -04003460 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04003461 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003462#else
3463 // x - floor(x) can be 1.0 for very small negative x.
3464 // Clamp against the value just below 1.0.
3465 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
3466#endif
John Bauman89401822014-05-06 15:04:28 -04003467 }
3468
John Bauman19bac1e2014-05-06 15:23:49 -04003469 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003470 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003471 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003472#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003473 if(CPUID::supportsSSE4_1())
3474 {
3475 return x86::floorss(x);
3476 }
3477 else
3478 {
3479 return Float4(Floor(Float4(x))).x;
3480 }
Logan Chien40a60052018-09-26 19:03:53 +08003481#else
3482 return RValue<Float>(V(lowerFloor(V(x.value))));
3483#endif
John Bauman89401822014-05-06 15:04:28 -04003484 }
3485
John Bauman19bac1e2014-05-06 15:23:49 -04003486 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04003487 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003488 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003489#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003490 if(CPUID::supportsSSE4_1())
3491 {
3492 return x86::ceilss(x);
3493 }
3494 else
Logan Chiene3191012018-08-24 22:01:50 +08003495#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003496 {
3497 return Float4(Ceil(Float4(x))).x;
3498 }
John Bauman89401822014-05-06 15:04:28 -04003499 }
3500
John Bauman19bac1e2014-05-06 15:23:49 -04003501 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04003502 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003503 return T(llvm::Type::getFloatTy(jit->context));
John Bauman89401822014-05-06 15:04:28 -04003504 }
3505
John Bauman19bac1e2014-05-06 15:23:49 -04003506 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04003507 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003508 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04003509 }
3510
Nicolas Capenscb986762017-01-20 11:34:37 -05003511 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04003512 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003513 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman66b8ab22014-05-06 15:57:45 -04003514 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04003515 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
3516
Nicolas Capense89cd582016-09-30 14:23:47 -04003517 int swizzle[4] = {0, 0, 0, 0};
3518 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04003519
John Bauman66b8ab22014-05-06 15:57:45 -04003520 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04003521 }
3522
John Bauman19bac1e2014-05-06 15:23:49 -04003523 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003524 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003525 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003526#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003527 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003528#else
3529 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
3530#endif
John Bauman89401822014-05-06 15:04:28 -04003531 }
3532
John Bauman19bac1e2014-05-06 15:23:49 -04003533 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003534 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003535 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003536#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003537 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08003538#else
3539 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
3540#endif
John Bauman89401822014-05-06 15:04:28 -04003541 }
3542
Nicolas Capens05b3d662016-02-25 23:58:33 -05003543 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04003544 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003545 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003546#if defined(__i386__) || defined(__x86_64__)
3547 if(exactAtPow2)
3548 {
3549 // rcpps uses a piecewise-linear approximation which minimizes the relative error
3550 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
3551 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
3552 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003553 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003554#else
3555 return As<Float4>(V(lowerRCP(V(x.value))));
3556#endif
John Bauman89401822014-05-06 15:04:28 -04003557 }
John Bauman66b8ab22014-05-06 15:57:45 -04003558
John Bauman19bac1e2014-05-06 15:23:49 -04003559 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003560 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003561 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003562#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003563 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003564#else
3565 return As<Float4>(V(lowerRSQRT(V(x.value))));
3566#endif
John Bauman89401822014-05-06 15:04:28 -04003567 }
3568
John Bauman19bac1e2014-05-06 15:23:49 -04003569 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003570 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003571 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003572#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003573 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003574#else
3575 return As<Float4>(V(lowerSQRT(V(x.value))));
3576#endif
John Bauman89401822014-05-06 15:04:28 -04003577 }
3578
John Bauman19bac1e2014-05-06 15:23:49 -04003579 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003580 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003581 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003582#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003583 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08003584#else
3585 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
3586#endif
John Bauman89401822014-05-06 15:04:28 -04003587 }
3588
John Bauman19bac1e2014-05-06 15:23:49 -04003589 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003590 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003591 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003592 // return As<Int4>(x86::cmpeqps(x, y));
3593 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
3594 }
3595
John Bauman19bac1e2014-05-06 15:23:49 -04003596 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003597 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003598 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003599 // return As<Int4>(x86::cmpltps(x, y));
3600 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
3601 }
3602
John Bauman19bac1e2014-05-06 15:23:49 -04003603 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003604 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003605 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003606 // return As<Int4>(x86::cmpleps(x, y));
3607 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
3608 }
3609
John Bauman19bac1e2014-05-06 15:23:49 -04003610 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003611 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003612 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003613 // return As<Int4>(x86::cmpneqps(x, y));
3614 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
3615 }
3616
John Bauman19bac1e2014-05-06 15:23:49 -04003617 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003618 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003619 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003620 // return As<Int4>(x86::cmpnltps(x, y));
3621 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
3622 }
3623
John Bauman19bac1e2014-05-06 15:23:49 -04003624 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003625 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003626 RR_DEBUG_INFO_UPDATE_LOC();
John Bauman89401822014-05-06 15:04:28 -04003627 // return As<Int4>(x86::cmpnleps(x, y));
3628 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
3629 }
3630
Ben Claytonec1aeb82019-03-04 19:33:27 +00003631 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3632 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003633 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003634 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
3635 }
3636
3637 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3638 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003639 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003640 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
3641 }
3642
3643 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3644 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003645 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003646 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
3647 }
3648
3649 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3650 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003651 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003652 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
3653 }
3654
3655 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3656 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003657 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003658 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
3659 }
3660
3661 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3662 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003663 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonec1aeb82019-03-04 19:33:27 +00003664 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
3665 }
3666
John Bauman19bac1e2014-05-06 15:23:49 -04003667 RValue<Float4> Round(RValue<Float4> x)
3668 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003669 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003670#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003671 if(CPUID::supportsSSE4_1())
3672 {
3673 return x86::roundps(x, 0);
3674 }
3675 else
3676 {
3677 return Float4(RoundInt(x));
3678 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003679#else
3680 return RValue<Float4>(V(lowerRound(V(x.value))));
3681#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003682 }
3683
3684 RValue<Float4> Trunc(RValue<Float4> x)
3685 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003686 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003687#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003688 if(CPUID::supportsSSE4_1())
3689 {
3690 return x86::roundps(x, 3);
3691 }
3692 else
3693 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003694 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003695 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003696#else
3697 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3698#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003699 }
3700
3701 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003702 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003703 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb9230422017-07-17 10:27:33 -04003704 Float4 frc;
3705
Logan Chien40a60052018-09-26 19:03:53 +08003706#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003707 if(CPUID::supportsSSE4_1())
3708 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003709 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003710 }
3711 else
3712 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003713 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003714
Nicolas Capensb9230422017-07-17 10:27:33 -04003715 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003716 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003717#else
3718 frc = x - Floor(x);
3719#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003720
3721 // x - floor(x) can be 1.0 for very small negative x.
3722 // Clamp against the value just below 1.0.
3723 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003724 }
3725
John Bauman19bac1e2014-05-06 15:23:49 -04003726 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003727 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003728 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003729#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003730 if(CPUID::supportsSSE4_1())
3731 {
3732 return x86::floorps(x);
3733 }
3734 else
3735 {
John Bauman19bac1e2014-05-06 15:23:49 -04003736 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003737 }
Logan Chien40a60052018-09-26 19:03:53 +08003738#else
3739 return RValue<Float4>(V(lowerFloor(V(x.value))));
3740#endif
John Bauman89401822014-05-06 15:04:28 -04003741 }
3742
John Bauman19bac1e2014-05-06 15:23:49 -04003743 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003744 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003745 RR_DEBUG_INFO_UPDATE_LOC();
Logan Chiene3191012018-08-24 22:01:50 +08003746#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003747 if(CPUID::supportsSSE4_1())
3748 {
3749 return x86::ceilps(x);
3750 }
3751 else
Logan Chiene3191012018-08-24 22:01:50 +08003752#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003753 {
3754 return -Floor(-x);
3755 }
John Bauman89401822014-05-06 15:04:28 -04003756 }
3757
Ben Claytona2c8b772019-04-09 13:42:36 -04003758 RValue<Float4> Sin(RValue<Float4> v)
3759 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003760 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::sin, { V(v.value)->getType() } );
3761 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Claytona2c8b772019-04-09 13:42:36 -04003762 }
3763
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003764 RValue<Float4> Cos(RValue<Float4> v)
3765 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003766 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::cos, { V(v.value)->getType() } );
3767 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003768 }
3769
Ben Clayton14740062019-04-09 13:48:41 -04003770 RValue<Float4> Tan(RValue<Float4> v)
3771 {
3772 return Sin(v) / Cos(v);
3773 }
3774
Ben Claytoneafae472019-04-09 14:22:38 -04003775 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003776 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003777 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), ::llvm::ArrayRef<llvm::Type*>(T(Float::getType())), false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01003778 auto func = jit->module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003779 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3780 for (uint64_t i = 0; i < 4; i++)
3781 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003782 auto el = jit->builder->CreateCall(func, V(Nucleus::createExtractElement(v.value, Float::getType(), i)));
Ben Claytonc38fc122019-04-11 08:58:49 -04003783 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytonf9350d72019-04-09 14:19:02 -04003784 }
3785 return RValue<Float4>(V(out));
3786 }
3787
Ben Claytoneafae472019-04-09 14:22:38 -04003788 RValue<Float4> Asin(RValue<Float4> v)
3789 {
3790 return TransformFloat4PerElement(v, "asinf");
3791 }
3792
3793 RValue<Float4> Acos(RValue<Float4> v)
3794 {
3795 return TransformFloat4PerElement(v, "acosf");
3796 }
3797
Ben Clayton749b4e02019-04-09 14:27:43 -04003798 RValue<Float4> Atan(RValue<Float4> v)
3799 {
3800 return TransformFloat4PerElement(v, "atanf");
3801 }
3802
Ben Claytond9636972019-04-09 15:09:54 -04003803 RValue<Float4> Sinh(RValue<Float4> v)
3804 {
3805 return TransformFloat4PerElement(v, "sinhf");
3806 }
3807
Ben Clayton900ea2c2019-04-09 15:25:36 -04003808 RValue<Float4> Cosh(RValue<Float4> v)
3809 {
3810 return TransformFloat4PerElement(v, "coshf");
3811 }
3812
Ben Clayton3928bd92019-04-09 15:27:41 -04003813 RValue<Float4> Tanh(RValue<Float4> v)
3814 {
3815 return TransformFloat4PerElement(v, "tanhf");
3816 }
3817
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003818 RValue<Float4> Asinh(RValue<Float4> v)
3819 {
3820 return TransformFloat4PerElement(v, "asinhf");
3821 }
3822
Ben Clayton28ebcb02019-04-09 15:33:38 -04003823 RValue<Float4> Acosh(RValue<Float4> v)
3824 {
3825 return TransformFloat4PerElement(v, "acoshf");
3826 }
3827
Ben Claytonfa6a5392019-04-09 15:35:24 -04003828 RValue<Float4> Atanh(RValue<Float4> v)
3829 {
3830 return TransformFloat4PerElement(v, "atanhf");
3831 }
3832
Ben Claytona520c3e2019-04-09 15:43:45 -04003833 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3834 {
Ben Claytonc38fc122019-04-11 08:58:49 -04003835 ::llvm::SmallVector<::llvm::Type*, 2> paramTys;
3836 paramTys.push_back(T(Float::getType()));
3837 paramTys.push_back(T(Float::getType()));
3838 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), paramTys, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01003839 auto func = jit->module->getOrInsertFunction("atan2f", funcTy);
Ben Claytona520c3e2019-04-09 15:43:45 -04003840 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3841 for (uint64_t i = 0; i < 4; i++)
3842 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003843 auto el = jit->builder->CreateCall2(func, ARGS(
Ben Claytonc38fc122019-04-11 08:58:49 -04003844 V(Nucleus::createExtractElement(x.value, Float::getType(), i)),
3845 V(Nucleus::createExtractElement(y.value, Float::getType(), i))
3846 ));
3847 out = V(Nucleus::createInsertElement(V(out), V(el), i));
Ben Claytona520c3e2019-04-09 15:43:45 -04003848 }
3849 return RValue<Float4>(V(out));
3850 }
3851
Ben Claytonbfe94f02019-04-09 15:52:12 -04003852 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3853 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003854 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::pow, { T(Float4::getType()) });
3855 return RValue<Float4>(V(jit->builder->CreateCall2(func, ARGS(V(x.value), V(y.value)))));
Ben Claytonbfe94f02019-04-09 15:52:12 -04003856 }
3857
Ben Clayton242f0022019-04-09 16:00:53 -04003858 RValue<Float4> Exp(RValue<Float4> v)
3859 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003860 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::exp, { T(Float4::getType()) } );
3861 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Clayton242f0022019-04-09 16:00:53 -04003862 }
3863
Ben Clayton2c1da722019-04-09 16:03:03 -04003864 RValue<Float4> Log(RValue<Float4> v)
3865 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003866 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::log, { T(Float4::getType()) } );
3867 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Clayton2c1da722019-04-09 16:03:03 -04003868 }
3869
Ben Claytonf40b56c2019-04-09 16:06:55 -04003870 RValue<Float4> Exp2(RValue<Float4> v)
3871 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003872 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::exp2, { T(Float4::getType()) } );
3873 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Claytonf40b56c2019-04-09 16:06:55 -04003874 }
3875
Ben Claytone17acfe2019-04-09 16:09:13 -04003876 RValue<Float4> Log2(RValue<Float4> v)
3877 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003878 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::log2, { T(Float4::getType()) } );
3879 return RValue<Float4>(V(jit->builder->CreateCall(func, V(v.value))));
Ben Claytone17acfe2019-04-09 16:09:13 -04003880 }
3881
Ben Clayton60958262019-04-10 14:53:30 -04003882 RValue<UInt4> Ctlz(RValue<UInt4> v, bool isZeroUndef)
3883 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003884 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::ctlz, { T(UInt4::getType()) } );
3885 return RValue<UInt4>(V(jit->builder->CreateCall2(func, ARGS(
Ben Clayton60958262019-04-10 14:53:30 -04003886 V(v.value),
Ben Clayton6f8e5652019-06-29 01:58:02 +01003887 isZeroUndef ? ::llvm::ConstantInt::getTrue(jit->context) : ::llvm::ConstantInt::getFalse(jit->context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003888 ))));
Ben Clayton60958262019-04-10 14:53:30 -04003889 }
3890
Ben Clayton3f007c42019-04-10 14:54:23 -04003891 RValue<UInt4> Cttz(RValue<UInt4> v, bool isZeroUndef)
3892 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003893 auto func = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::cttz, { T(UInt4::getType()) } );
3894 return RValue<UInt4>(V(jit->builder->CreateCall2(func, ARGS(
Ben Clayton3f007c42019-04-10 14:54:23 -04003895 V(v.value),
Ben Clayton6f8e5652019-06-29 01:58:02 +01003896 isZeroUndef ? ::llvm::ConstantInt::getTrue(jit->context) : ::llvm::ConstantInt::getFalse(jit->context)
Ben Claytonc38fc122019-04-11 08:58:49 -04003897 ))));
Ben Clayton3f007c42019-04-10 14:54:23 -04003898 }
3899
John Bauman19bac1e2014-05-06 15:23:49 -04003900 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003901 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003902 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003903 }
3904
John Bauman89401822014-05-06 15:04:28 -04003905 RValue<Long> Ticks()
3906 {
Ben Claytonac07ed82019-03-26 14:17:41 +00003907 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton6f8e5652019-06-29 01:58:02 +01003908 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003909
Ben Clayton6f8e5652019-06-29 01:58:02 +01003910 return RValue<Long>(V(jit->builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003911 }
Ben Claytond853c122019-04-16 17:51:49 -04003912
3913 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3914 {
3915 // Note: this should work for 32-bit pointers as well because 'inttoptr'
3916 // is defined to truncate (and zero extend) if necessary.
Ben Clayton6f8e5652019-06-29 01:58:02 +01003917 auto ptrAsInt = ::llvm::ConstantInt::get(::llvm::Type::getInt64Ty(jit->context), reinterpret_cast<uintptr_t>(ptr));
3918 return RValue<Pointer<Byte>>(V(jit->builder->CreateIntToPtr(ptrAsInt, T(Pointer<Byte>::getType()))));
Ben Claytond853c122019-04-16 17:51:49 -04003919 }
3920
3921 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3922 {
3923 ::llvm::SmallVector<::llvm::Type*, 8> paramTys;
3924 for (auto ty : argTys) { paramTys.push_back(T(ty)); }
3925 auto funcTy = ::llvm::FunctionType::get(T(retTy), paramTys, false);
3926
3927 auto funcPtrTy = funcTy->getPointerTo();
Ben Clayton6f8e5652019-06-29 01:58:02 +01003928 auto funcPtr = jit->builder->CreatePointerCast(V(fptr.value), funcPtrTy);
Ben Claytond853c122019-04-16 17:51:49 -04003929
3930 ::llvm::SmallVector<::llvm::Value*, 8> arguments;
3931 for (auto arg : args) { arguments.push_back(V(arg)); }
Ben Clayton6f8e5652019-06-29 01:58:02 +01003932 return V(jit->builder->CreateCall(funcPtr, arguments));
Ben Claytond853c122019-04-16 17:51:49 -04003933 }
Nicolas Capens9770a462019-06-25 10:47:10 -04003934
3935 void Breakpoint()
3936 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003937 llvm::Function *debugtrap = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::debugtrap);
Nicolas Capens9770a462019-06-25 10:47:10 -04003938
Ben Clayton6f8e5652019-06-29 01:58:02 +01003939 jit->builder->CreateCall(debugtrap);
Nicolas Capens9770a462019-06-25 10:47:10 -04003940 }
John Bauman89401822014-05-06 15:04:28 -04003941}
3942
Nicolas Capens48461502018-08-06 14:20:45 -04003943namespace rr
John Bauman89401822014-05-06 15:04:28 -04003944{
Logan Chiene3191012018-08-24 22:01:50 +08003945#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003946 namespace x86
3947 {
John Bauman19bac1e2014-05-06 15:23:49 -04003948 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003949 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003950 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003951
John Bauman89401822014-05-06 15:04:28 -04003952 Float4 vector;
3953 vector.x = val;
3954
Ben Clayton6f8e5652019-06-29 01:58:02 +01003955 return RValue<Int>(V(jit->builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003956 }
3957
John Bauman19bac1e2014-05-06 15:23:49 -04003958 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003959 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003960 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003961
Ben Clayton6f8e5652019-06-29 01:58:02 +01003962 return RValue<Int4>(V(jit->builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003963 }
3964
John Bauman19bac1e2014-05-06 15:23:49 -04003965 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003966 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003967 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003968
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003969 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003970
Ben Clayton6f8e5652019-06-29 01:58:02 +01003971 return RValue<Float>(Nucleus::createExtractElement(V(jit->builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003972 }
3973
John Bauman19bac1e2014-05-06 15:23:49 -04003974 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003975 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003976 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3977 return RValue<Float>(V(jit->builder->CreateCall(sqrt, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003978 }
3979
John Bauman19bac1e2014-05-06 15:23:49 -04003980 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003981 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003982 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003983
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003984 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003985
Ben Clayton6f8e5652019-06-29 01:58:02 +01003986 return RValue<Float>(Nucleus::createExtractElement(V(jit->builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003987 }
3988
John Bauman19bac1e2014-05-06 15:23:49 -04003989 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003990 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003991 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003992
Ben Clayton6f8e5652019-06-29 01:58:02 +01003993 return RValue<Float4>(V(jit->builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003994 }
3995
John Bauman19bac1e2014-05-06 15:23:49 -04003996 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003997 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01003998 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::sqrt, {V(val.value)->getType()});
John Bauman66b8ab22014-05-06 15:57:45 -04003999
Ben Clayton6f8e5652019-06-29 01:58:02 +01004000 return RValue<Float4>(V(jit->builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04004001 }
4002
John Bauman19bac1e2014-05-06 15:23:49 -04004003 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04004004 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004005 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04004006
Ben Clayton6f8e5652019-06-29 01:58:02 +01004007 return RValue<Float4>(V(jit->builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04004008 }
4009
John Bauman19bac1e2014-05-06 15:23:49 -04004010 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04004011 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004012 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04004013
Ben Clayton6f8e5652019-06-29 01:58:02 +01004014 return RValue<Float4>(V(jit->builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004015 }
4016
John Bauman19bac1e2014-05-06 15:23:49 -04004017 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04004018 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004019 llvm::Function *minps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04004020
Ben Clayton6f8e5652019-06-29 01:58:02 +01004021 return RValue<Float4>(V(jit->builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004022 }
4023
John Bauman19bac1e2014-05-06 15:23:49 -04004024 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04004025 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004026 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04004027
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04004028 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04004029 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
4030
Ben Clayton6f8e5652019-06-29 01:58:02 +01004031 return RValue<Float>(Nucleus::createExtractElement(V(jit->builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04004032 }
4033
John Bauman19bac1e2014-05-06 15:23:49 -04004034 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04004035 {
4036 return roundss(val, 1);
4037 }
4038
John Bauman19bac1e2014-05-06 15:23:49 -04004039 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04004040 {
4041 return roundss(val, 2);
4042 }
4043
John Bauman19bac1e2014-05-06 15:23:49 -04004044 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04004045 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004046 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04004047
Ben Clayton6f8e5652019-06-29 01:58:02 +01004048 return RValue<Float4>(V(jit->builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04004049 }
4050
John Bauman19bac1e2014-05-06 15:23:49 -04004051 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04004052 {
4053 return roundps(val, 1);
4054 }
4055
John Bauman19bac1e2014-05-06 15:23:49 -04004056 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04004057 {
4058 return roundps(val, 2);
4059 }
4060
Alexis Hetu0f448072016-03-18 10:56:08 -04004061 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04004062 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004063 return RValue<Int4>(V(lowerPABS(V(x.value))));
John Bauman89401822014-05-06 15:04:28 -04004064 }
4065
John Bauman19bac1e2014-05-06 15:23:49 -04004066 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004067 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004068 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04004069
Ben Clayton6f8e5652019-06-29 01:58:02 +01004070 return As<Short4>(V(jit->builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004071 }
John Bauman66b8ab22014-05-06 15:57:45 -04004072
John Bauman19bac1e2014-05-06 15:23:49 -04004073 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004074 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004075 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04004076
Ben Clayton6f8e5652019-06-29 01:58:02 +01004077 return As<Short4>(V(jit->builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004078 }
4079
John Bauman19bac1e2014-05-06 15:23:49 -04004080 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004081 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004082 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04004083
Ben Clayton6f8e5652019-06-29 01:58:02 +01004084 return As<UShort4>(V(jit->builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004085 }
John Bauman66b8ab22014-05-06 15:57:45 -04004086
John Bauman19bac1e2014-05-06 15:23:49 -04004087 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004088 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004089 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04004090
Ben Clayton6f8e5652019-06-29 01:58:02 +01004091 return As<UShort4>(V(jit->builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004092 }
4093
John Bauman19bac1e2014-05-06 15:23:49 -04004094 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04004095 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004096 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04004097
Ben Clayton6f8e5652019-06-29 01:58:02 +01004098 return As<SByte8>(V(jit->builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004099 }
John Bauman66b8ab22014-05-06 15:57:45 -04004100
John Bauman19bac1e2014-05-06 15:23:49 -04004101 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04004102 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004103 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04004104
Ben Clayton6f8e5652019-06-29 01:58:02 +01004105 return As<SByte8>(V(jit->builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004106 }
John Bauman66b8ab22014-05-06 15:57:45 -04004107
John Bauman19bac1e2014-05-06 15:23:49 -04004108 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04004109 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004110 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04004111
Ben Clayton6f8e5652019-06-29 01:58:02 +01004112 return As<Byte8>(V(jit->builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004113 }
John Bauman66b8ab22014-05-06 15:57:45 -04004114
John Bauman19bac1e2014-05-06 15:23:49 -04004115 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04004116 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004117 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04004118
Ben Clayton6f8e5652019-06-29 01:58:02 +01004119 return As<Byte8>(V(jit->builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04004120 }
4121
4122 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004123 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004124 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
John Bauman89401822014-05-06 15:04:28 -04004125 }
4126
John Bauman19bac1e2014-05-06 15:23:49 -04004127 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004128 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004129 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman89401822014-05-06 15:04:28 -04004130 }
4131
John Bauman19bac1e2014-05-06 15:23:49 -04004132 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004133 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004134 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman89401822014-05-06 15:04:28 -04004135 }
4136
John Bauman19bac1e2014-05-06 15:23:49 -04004137 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004138 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004139 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004140 }
4141
John Bauman19bac1e2014-05-06 15:23:49 -04004142 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004143 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004144 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004145 }
4146
John Bauman19bac1e2014-05-06 15:23:49 -04004147 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04004148 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004149 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004150 }
4151
John Bauman19bac1e2014-05-06 15:23:49 -04004152 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04004153 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004154 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
John Bauman89401822014-05-06 15:04:28 -04004155 }
4156
John Bauman19bac1e2014-05-06 15:23:49 -04004157 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04004158 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004159 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04004160
Ben Clayton6f8e5652019-06-29 01:58:02 +01004161 return As<Short4>(V(jit->builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004162 }
4163
John Bauman19bac1e2014-05-06 15:23:49 -04004164 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04004165 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004166 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04004167
Ben Clayton6f8e5652019-06-29 01:58:02 +01004168 return RValue<Short8>(V(jit->builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004169 }
4170
John Bauman19bac1e2014-05-06 15:23:49 -04004171 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004172 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004173 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04004174
Ben Clayton6f8e5652019-06-29 01:58:02 +01004175 return As<SByte8>(V(jit->builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004176 }
4177
Nicolas Capens33438a62017-09-27 11:47:35 -04004178 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004179 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004180 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04004181
Ben Clayton6f8e5652019-06-29 01:58:02 +01004182 return As<Byte8>(V(jit->builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004183 }
4184
Nicolas Capens3e7062b2017-01-17 14:01:33 -05004185 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04004186 {
4187 if(CPUID::supportsSSE4_1())
4188 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004189 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04004190
Ben Clayton6f8e5652019-06-29 01:58:02 +01004191 return RValue<UShort8>(V(jit->builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004192 }
4193 else
4194 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05004195 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
4196 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
4197
4198 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04004199 }
4200 }
4201
John Bauman19bac1e2014-05-06 15:23:49 -04004202 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004203 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004204 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04004205
Ben Clayton6f8e5652019-06-29 01:58:02 +01004206 return As<UShort4>(V(jit->builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004207 }
4208
John Bauman19bac1e2014-05-06 15:23:49 -04004209 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004210 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004211 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04004212
Ben Clayton6f8e5652019-06-29 01:58:02 +01004213 return RValue<UShort8>(V(jit->builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004214 }
4215
John Bauman19bac1e2014-05-06 15:23:49 -04004216 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004217 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004218 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04004219
Ben Clayton6f8e5652019-06-29 01:58:02 +01004220 return As<Short4>(V(jit->builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004221 }
4222
John Bauman19bac1e2014-05-06 15:23:49 -04004223 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004224 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004225 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04004226
Ben Clayton6f8e5652019-06-29 01:58:02 +01004227 return RValue<Short8>(V(jit->builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004228 }
4229
John Bauman19bac1e2014-05-06 15:23:49 -04004230 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004231 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004232 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04004233
Ben Clayton6f8e5652019-06-29 01:58:02 +01004234 return As<Short4>(V(jit->builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004235 }
4236
John Bauman19bac1e2014-05-06 15:23:49 -04004237 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004238 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004239 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04004240
Ben Clayton6f8e5652019-06-29 01:58:02 +01004241 return RValue<Short8>(V(jit->builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004242 }
4243
John Bauman19bac1e2014-05-06 15:23:49 -04004244 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004245 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004246 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04004247
Ben Clayton6f8e5652019-06-29 01:58:02 +01004248 return As<Int2>(V(jit->builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004249 }
4250
John Bauman19bac1e2014-05-06 15:23:49 -04004251 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004252 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004253 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04004254
Ben Clayton6f8e5652019-06-29 01:58:02 +01004255 return RValue<Int4>(V(jit->builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004256 }
4257
John Bauman19bac1e2014-05-06 15:23:49 -04004258 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004259 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004260 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04004261
Ben Clayton6f8e5652019-06-29 01:58:02 +01004262 return As<Int2>(V(jit->builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004263 }
4264
John Bauman19bac1e2014-05-06 15:23:49 -04004265 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004266 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004267 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04004268
Ben Clayton6f8e5652019-06-29 01:58:02 +01004269 return RValue<Int4>(V(jit->builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004270 }
4271
John Bauman19bac1e2014-05-06 15:23:49 -04004272 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004273 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004274 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04004275
Ben Clayton6f8e5652019-06-29 01:58:02 +01004276 return As<UInt2>(V(jit->builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004277 }
4278
John Bauman19bac1e2014-05-06 15:23:49 -04004279 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04004280 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004281 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04004282
Ben Clayton6f8e5652019-06-29 01:58:02 +01004283 return RValue<UInt4>(V(jit->builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04004284 }
4285
John Bauman19bac1e2014-05-06 15:23:49 -04004286 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
4287 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004288 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004289 }
4290
4291 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
4292 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004293 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004294 }
4295
4296 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
4297 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004298 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004299 }
4300
4301 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
4302 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004303 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
John Bauman19bac1e2014-05-06 15:23:49 -04004304 }
4305
4306 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004307 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004308 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004309
Ben Clayton6f8e5652019-06-29 01:58:02 +01004310 return As<Short4>(V(jit->builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004311 }
4312
John Bauman19bac1e2014-05-06 15:23:49 -04004313 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04004314 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004315 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004316
Ben Clayton6f8e5652019-06-29 01:58:02 +01004317 return As<UShort4>(V(jit->builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004318 }
4319
John Bauman19bac1e2014-05-06 15:23:49 -04004320 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04004321 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004322 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004323
Ben Clayton6f8e5652019-06-29 01:58:02 +01004324 return As<Int2>(V(jit->builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004325 }
4326
John Bauman19bac1e2014-05-06 15:23:49 -04004327 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004328 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004329 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04004330
Ben Clayton6f8e5652019-06-29 01:58:02 +01004331 return RValue<Short8>(V(jit->builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004332 }
4333
John Bauman19bac1e2014-05-06 15:23:49 -04004334 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04004335 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004336 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04004337
Ben Clayton6f8e5652019-06-29 01:58:02 +01004338 return RValue<UShort8>(V(jit->builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004339 }
4340
John Bauman19bac1e2014-05-06 15:23:49 -04004341 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04004342 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004343 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04004344
Ben Clayton6f8e5652019-06-29 01:58:02 +01004345 return RValue<Int4>(V(jit->builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04004346 }
4347
John Bauman19bac1e2014-05-06 15:23:49 -04004348 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04004349 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004350 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04004351
Ben Clayton6f8e5652019-06-29 01:58:02 +01004352 return RValue<Int>(V(jit->builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04004353 }
4354
John Bauman19bac1e2014-05-06 15:23:49 -04004355 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04004356 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004357 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04004358
Ben Clayton6f8e5652019-06-29 01:58:02 +01004359 return RValue<Int>(V(jit->builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04004360 }
4361
Nicolas Capens01a97962017-07-28 17:30:51 -04004362 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04004363 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004364 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004365 }
4366
Nicolas Capens01a97962017-07-28 17:30:51 -04004367 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04004368 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004369 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004370 }
4371
Nicolas Capens01a97962017-07-28 17:30:51 -04004372 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04004373 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004374 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
John Bauman89401822014-05-06 15:04:28 -04004375 }
4376
Nicolas Capens01a97962017-07-28 17:30:51 -04004377 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04004378 {
Logan Chien0eedc8c2018-08-21 09:34:28 +08004379 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
John Bauman89401822014-05-06 15:04:28 -04004380 }
4381 }
Logan Chiene3191012018-08-24 22:01:50 +08004382#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004383
Ben Clayton60a3d6f2019-02-26 17:24:46 +00004384#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004385 // extractAll returns a vector containing the extracted n scalar value of
4386 // the vector vec.
4387 static std::vector<Value*> extractAll(Value* vec, int n)
4388 {
4389 std::vector<Value*> elements;
4390 elements.reserve(n);
4391 for (int i = 0; i < n; i++)
4392 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004393 auto el = V(jit->builder->CreateExtractElement(V(vec), i));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004394 elements.push_back(el);
4395 }
4396 return elements;
4397 }
4398
Ben Claytonca8e3d72019-05-14 16:51:05 +01004399 // toInt returns all the integer values in vals extended to a native width
4400 // integer.
4401 static std::vector<Value*> toInt(const std::vector<Value*>& vals, bool isSigned)
4402 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004403 auto intTy = ::llvm::Type::getIntNTy(jit->context, sizeof(int) * 8); // Natural integer width.
Ben Claytonca8e3d72019-05-14 16:51:05 +01004404 std::vector<Value*> elements;
4405 elements.reserve(vals.size());
4406 for (auto v : vals)
4407 {
4408 if (isSigned)
4409 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004410 elements.push_back(V(jit->builder->CreateSExt(V(v), intTy)));
Ben Claytonca8e3d72019-05-14 16:51:05 +01004411 }
4412 else
4413 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004414 elements.push_back(V(jit->builder->CreateZExt(V(v), intTy)));
Ben Claytonca8e3d72019-05-14 16:51:05 +01004415 }
4416 }
4417 return elements;
4418 }
4419
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004420 // toDouble returns all the float values in vals extended to doubles.
4421 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
4422 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004423 auto doubleTy = ::llvm::Type::getDoubleTy(jit->context);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004424 std::vector<Value*> elements;
4425 elements.reserve(vals.size());
4426 for (auto v : vals)
4427 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004428 elements.push_back(V(jit->builder->CreateFPExt(V(v), doubleTy)));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004429 }
4430 return elements;
4431 }
4432
Ben Claytonca8e3d72019-05-14 16:51:05 +01004433 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return toInt(extractAll(v.value, 4), false); }
4434 std::vector<Value*> PrintValue::Ty<Int>::val(const RValue<Int>& v) { return toInt({v.value}, true); }
4435 std::vector<Value*> PrintValue::Ty<Int2>::val(const RValue<Int2>& v) { return toInt(extractAll(v.value, 2), true); }
4436 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return toInt(extractAll(v.value, 4), true); }
4437 std::vector<Value*> PrintValue::Ty<UInt>::val(const RValue<UInt>& v) { return toInt({v.value}, false); }
4438 std::vector<Value*> PrintValue::Ty<UInt2>::val(const RValue<UInt2>& v) { return toInt(extractAll(v.value, 2), false); }
4439 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return toInt(extractAll(v.value, 4), false); }
4440 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return toInt(extractAll(v.value, 4), true); }
4441 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return toInt(extractAll(v.value, 4), false); }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004442 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
4443 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
Ben Clayton6f8e5652019-06-29 01:58:02 +01004444 std::vector<Value*> PrintValue::Ty<const char*>::val(const char* v) { return {V(jit->builder->CreateGlobalStringPtr(v))}; }
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004445
4446 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
4447 {
4448 // LLVM types used below.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004449 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
4450 auto intTy = ::llvm::Type::getIntNTy(jit->context, sizeof(int) * 8); // Natural integer width.
4451 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004452 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
4453
Ben Clayton6f8e5652019-06-29 01:58:02 +01004454 auto func = jit->module->getOrInsertFunction("printf", funcTy);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004455
4456 // Build the printf format message string.
4457 std::string str;
4458 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
4459 if (function != nullptr) { str += "%s "; }
4460 str += fmt;
4461
4462 // Perform subsitution on all '{n}' bracketed indices in the format
4463 // message.
4464 int i = 0;
4465 for (const PrintValue& arg : args)
4466 {
4467 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
4468 }
4469
4470 ::llvm::SmallVector<::llvm::Value*, 8> vals;
4471
4472 // The format message is always the first argument.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004473 vals.push_back(jit->builder->CreateGlobalStringPtr(str));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004474
4475 // Add optional file, line and function info if provided.
4476 if (file != nullptr)
4477 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004478 vals.push_back(jit->builder->CreateGlobalStringPtr(file));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004479 if (line > 0)
4480 {
4481 vals.push_back(::llvm::ConstantInt::get(intTy, line));
4482 }
4483 }
4484 if (function != nullptr)
4485 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004486 vals.push_back(jit->builder->CreateGlobalStringPtr(function));
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004487 }
4488
4489 // Add all format arguments.
4490 for (const PrintValue& arg : args)
4491 {
4492 for (auto val : arg.values)
4493 {
4494 vals.push_back(V(val));
4495 }
4496 }
4497
Ben Clayton6f8e5652019-06-29 01:58:02 +01004498 jit->builder->CreateCall(func, vals);
Ben Clayton1bc7ee92019-02-14 18:43:22 +00004499 }
4500#endif // ENABLE_RR_PRINT
4501
Ben Claytonac07ed82019-03-26 14:17:41 +00004502 void Nop()
4503 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004504 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
Ben Claytonac07ed82019-03-26 14:17:41 +00004505 auto funcTy = ::llvm::FunctionType::get(voidTy, {}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004506 auto func = jit->module->getOrInsertFunction("nop", funcTy);
4507 jit->builder->CreateCall(func);
Ben Claytonac07ed82019-03-26 14:17:41 +00004508 }
4509
4510 void EmitDebugLocation()
4511 {
4512#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004513 if (jit->debugInfo != nullptr)
Ben Claytonac07ed82019-03-26 14:17:41 +00004514 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004515 jit->debugInfo->EmitLocation();
Ben Claytonac07ed82019-03-26 14:17:41 +00004516 }
4517#endif // ENABLE_RR_DEBUG_INFO
4518 }
4519
4520 void EmitDebugVariable(Value* value)
4521 {
4522#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004523 if (jit->debugInfo != nullptr)
Ben Claytonac07ed82019-03-26 14:17:41 +00004524 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004525 jit->debugInfo->EmitVariable(value);
Ben Claytonac07ed82019-03-26 14:17:41 +00004526 }
4527#endif // ENABLE_RR_DEBUG_INFO
4528 }
4529
4530 void FlushDebug()
4531 {
4532#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004533 if (jit->debugInfo != nullptr)
Ben Claytonac07ed82019-03-26 14:17:41 +00004534 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004535 jit->debugInfo->Flush();
Ben Claytonac07ed82019-03-26 14:17:41 +00004536 }
4537#endif // ENABLE_RR_DEBUG_INFO
4538 }
4539
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004540} // namespace rr
4541
4542// ------------------------------ Coroutines ------------------------------
4543
4544namespace {
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004545 // Magic values retuned by llvm.coro.suspend.
4546 // See: https://llvm.org/docs/Coroutines.html#llvm-coro-suspend-intrinsic
4547 enum SuspendAction
4548 {
4549 SuspendActionSuspend = -1,
4550 SuspendActionResume = 0,
4551 SuspendActionDestroy = 1
4552 };
4553
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004554
Ben Clayton16da2812019-07-09 23:28:51 +01004555void promoteFunctionToCoroutine()
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004556{
Ben Clayton16da2812019-07-09 23:28:51 +01004557 ASSERT(jit->coroutine.id == nullptr);
4558
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004559 // Types
Ben Clayton6f8e5652019-06-29 01:58:02 +01004560 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
4561 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
4562 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
4563 auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
4564 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
Ben Clayton16da2812019-07-09 23:28:51 +01004565 auto promiseTy = jit->coroutine.yieldType;
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004566 auto promisePtrTy = promiseTy->getPointerTo();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004567
4568 // LLVM intrinsics
Ben Clayton6f8e5652019-06-29 01:58:02 +01004569 auto coro_id = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::coro_id);
4570 auto coro_size = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::coro_size, {i32Ty});
4571 auto coro_begin = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::coro_begin);
4572 auto coro_resume = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_resume);
4573 auto coro_end = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_end);
4574 auto coro_free = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_free);
4575 auto coro_destroy = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_destroy);
4576 auto coro_promise = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_promise);
4577 auto coro_done = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_done);
4578 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_suspend);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004579
4580 auto allocFrameTy = ::llvm::FunctionType::get(i8PtrTy, {i32Ty}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004581 auto allocFrame = jit->module->getOrInsertFunction("coroutine_alloc_frame", allocFrameTy);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004582 auto freeFrameTy = ::llvm::FunctionType::get(voidTy, {i8PtrTy}, false);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004583 auto freeFrame = jit->module->getOrInsertFunction("coroutine_free_frame", freeFrameTy);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004584
Ben Clayton16da2812019-07-09 23:28:51 +01004585 auto oldInsertionPoint = jit->builder->saveIP();
4586
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004587 // Build the coroutine_await() function:
4588 //
4589 // bool coroutine_await(CoroutineHandle* handle, YieldType* out)
4590 // {
4591 // if (llvm.coro.done(handle))
4592 // {
4593 // return false;
4594 // }
4595 // else
4596 // {
4597 // *value = (T*)llvm.coro.promise(handle);
4598 // llvm.coro.resume(handle);
4599 // return true;
4600 // }
4601 // }
4602 //
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004603 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004604 auto args = jit->coroutine.await->arg_begin();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004605 auto handle = args++;
4606 auto outPtr = args++;
Ben Clayton6f8e5652019-06-29 01:58:02 +01004607 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "co_await", jit->coroutine.await));
4608 auto doneBlock = llvm::BasicBlock::Create(jit->context, "done", jit->coroutine.await);
4609 auto resumeBlock = llvm::BasicBlock::Create(jit->context, "resume", jit->coroutine.await);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004610
Ben Clayton6f8e5652019-06-29 01:58:02 +01004611 auto done = jit->builder->CreateCall(coro_done, {handle}, "done");
4612 jit->builder->CreateCondBr(done, doneBlock, resumeBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004613
Ben Clayton6f8e5652019-06-29 01:58:02 +01004614 jit->builder->SetInsertPoint(doneBlock);
4615 jit->builder->CreateRet(::llvm::ConstantInt::getFalse(i1Ty));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004616
Ben Clayton6f8e5652019-06-29 01:58:02 +01004617 jit->builder->SetInsertPoint(resumeBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004618 auto promiseAlignment = ::llvm::ConstantInt::get(i32Ty, 4); // TODO: Get correct alignment.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004619 auto promisePtr = jit->builder->CreateCall(coro_promise, {handle, promiseAlignment, ::llvm::ConstantInt::get(i1Ty, 0)});
4620 auto promise = jit->builder->CreateLoad(jit->builder->CreatePointerCast(promisePtr, promisePtrTy));
4621 jit->builder->CreateStore(promise, outPtr);
4622 jit->builder->CreateCall(coro_resume, {handle});
4623 jit->builder->CreateRet(::llvm::ConstantInt::getTrue(i1Ty));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004624 }
4625
4626 // Build the coroutine_destroy() function:
4627 //
4628 // void coroutine_destroy(CoroutineHandle* handle)
4629 // {
4630 // llvm.coro.destroy(handle);
4631 // }
4632 //
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004633 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004634 auto handle = jit->coroutine.destroy->arg_begin();
4635 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.destroy));
4636 jit->builder->CreateCall(coro_destroy, {handle});
4637 jit->builder->CreateRetVoid();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004638 }
4639
4640 // Begin building the main coroutine_begin() function.
4641 //
4642 // CoroutineHandle* coroutine_begin(<Arguments>)
4643 // {
4644 // YieldType promise;
4645 // auto id = llvm.coro.id(0, &promise, nullptr, nullptr);
4646 // void* frame = coroutine_alloc_frame(llvm.coro.size.i32());
4647 // CoroutineHandle *handle = llvm.coro.begin(id, frame);
4648 //
4649 // ... <REACTOR CODE> ...
4650 //
4651 // end:
4652 // SuspendAction action = llvm.coro.suspend(none, true /* final */); // <-- RESUME POINT
4653 // switch (action)
4654 // {
4655 // case SuspendActionResume:
4656 // UNREACHABLE(); // Illegal to resume after final suspend.
4657 // case SuspendActionDestroy:
4658 // goto destroy;
4659 // default: // (SuspendActionSuspend)
4660 // goto suspend;
4661 // }
4662 //
4663 // destroy:
4664 // coroutine_free_frame(llvm.coro.free(id, handle));
4665 // goto suspend;
4666 //
4667 // suspend:
4668 // llvm.coro.end(handle, false);
4669 // return handle;
4670 // }
4671 //
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004672
4673#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton16da2812019-07-09 23:28:51 +01004674 jit->debugInfo = std::unique_ptr<rr::DebugInfo>(new rr::DebugInfo(jit->builder.get(), &jit->context, jit->module.get(), jit->function));
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004675#endif // ENABLE_RR_DEBUG_INFO
4676
Ben Clayton6f8e5652019-06-29 01:58:02 +01004677 jit->coroutine.suspendBlock = llvm::BasicBlock::Create(jit->context, "suspend", jit->function);
4678 jit->coroutine.endBlock = llvm::BasicBlock::Create(jit->context, "end", jit->function);
4679 jit->coroutine.destroyBlock = llvm::BasicBlock::Create(jit->context, "destroy", jit->function);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004680
Ben Clayton16da2812019-07-09 23:28:51 +01004681 jit->builder->SetInsertPoint(jit->coroutine.entryBlock, jit->coroutine.entryBlock->begin());
4682 jit->coroutine.promise = jit->builder->CreateAlloca(promiseTy, nullptr, "promise");
Ben Clayton6f8e5652019-06-29 01:58:02 +01004683 jit->coroutine.id = jit->builder->CreateCall(coro_id, {
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004684 ::llvm::ConstantInt::get(i32Ty, 0),
Ben Clayton6f8e5652019-06-29 01:58:02 +01004685 jit->builder->CreatePointerCast(jit->coroutine.promise, i8PtrTy),
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004686 ::llvm::ConstantPointerNull::get(i8PtrTy),
4687 ::llvm::ConstantPointerNull::get(i8PtrTy),
4688 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01004689 auto size = jit->builder->CreateCall(coro_size, {});
4690 auto frame = jit->builder->CreateCall(allocFrame, {size});
4691 jit->coroutine.handle = jit->builder->CreateCall(coro_begin, {jit->coroutine.id, frame});
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004692
4693 // Build the suspend block
Ben Clayton6f8e5652019-06-29 01:58:02 +01004694 jit->builder->SetInsertPoint(jit->coroutine.suspendBlock);
4695 jit->builder->CreateCall(coro_end, {jit->coroutine.handle, ::llvm::ConstantInt::get(i1Ty, 0)});
4696 jit->builder->CreateRet(jit->coroutine.handle);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004697
4698 // Build the end block
Ben Clayton6f8e5652019-06-29 01:58:02 +01004699 jit->builder->SetInsertPoint(jit->coroutine.endBlock);
4700 auto action = jit->builder->CreateCall(coro_suspend, {
4701 ::llvm::ConstantTokenNone::get(jit->context),
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004702 ::llvm::ConstantInt::get(i1Ty, 1), // final: true
4703 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01004704 auto switch_ = jit->builder->CreateSwitch(action, jit->coroutine.suspendBlock, 3);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004705 // switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), trapBlock); // TODO: Trap attempting to resume after final suspend
Ben Clayton6f8e5652019-06-29 01:58:02 +01004706 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), jit->coroutine.destroyBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004707
4708 // Build the destroy block
Ben Clayton6f8e5652019-06-29 01:58:02 +01004709 jit->builder->SetInsertPoint(jit->coroutine.destroyBlock);
4710 auto memory = jit->builder->CreateCall(coro_free, {jit->coroutine.id, jit->coroutine.handle});
4711 jit->builder->CreateCall(freeFrame, {memory});
4712 jit->builder->CreateBr(jit->coroutine.suspendBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004713
Ben Clayton16da2812019-07-09 23:28:51 +01004714 // Switch back to original insert point to continue building the coroutine.
4715 jit->builder->restoreIP(oldInsertionPoint);
4716}
4717
4718} // anonymous namespace
4719
4720namespace rr {
4721
4722void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
4723{
4724 // Coroutines are initially created as a regular function.
4725 // Upon the first call to Yield(), the function is promoted to a true
4726 // coroutine.
4727 auto voidTy = ::llvm::Type::getVoidTy(jit->context);
4728 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
4729 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
4730 auto handleTy = i8PtrTy;
4731 auto boolTy = i1Ty;
4732 auto promiseTy = T(YieldType);
4733 auto promisePtrTy = promiseTy->getPointerTo();
4734
4735 jit->function = rr::createFunction("coroutine_begin", handleTy, T(Params));
4736 jit->coroutine.await = rr::createFunction("coroutine_await", boolTy, {handleTy, promisePtrTy});
4737 jit->coroutine.destroy = rr::createFunction("coroutine_destroy", voidTy, {handleTy});
4738 jit->coroutine.yieldType = promiseTy;
4739 jit->coroutine.entryBlock = llvm::BasicBlock::Create(jit->context, "function", jit->function);
4740
4741 jit->builder->SetInsertPoint(jit->coroutine.entryBlock);
John Bauman89401822014-05-06 15:04:28 -04004742}
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004743
4744void Nucleus::yield(Value* val)
4745{
Ben Clayton16da2812019-07-09 23:28:51 +01004746 if (jit->coroutine.id == nullptr)
4747 {
4748 // First call to yield().
4749 // Promote the function to a full coroutine.
4750 promoteFunctionToCoroutine();
4751 ASSERT(jit->coroutine.id != nullptr);
4752 }
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004753
4754 // promise = val;
4755 //
4756 // auto action = llvm.coro.suspend(none, false /* final */); // <-- RESUME POINT
4757 // switch (action)
4758 // {
4759 // case SuspendActionResume:
4760 // goto resume;
4761 // case SuspendActionDestroy:
4762 // goto destroy;
4763 // default: // (SuspendActionSuspend)
4764 // goto suspend;
4765 // }
4766 // resume:
4767 //
4768
4769 RR_DEBUG_INFO_UPDATE_LOC();
4770 Variable::materializeAll();
4771
4772 // Types
Ben Clayton6f8e5652019-06-29 01:58:02 +01004773 auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
4774 auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004775
4776 // Intrinsics
Ben Clayton6f8e5652019-06-29 01:58:02 +01004777 auto coro_suspend = ::llvm::Intrinsic::getDeclaration(jit->module.get(), ::llvm::Intrinsic::coro_suspend);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004778
4779 // Create a block to resume execution.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004780 auto resumeBlock = llvm::BasicBlock::Create(jit->context, "resume", jit->function);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004781
4782 // Store the promise (yield value)
Ben Clayton6f8e5652019-06-29 01:58:02 +01004783 jit->builder->CreateStore(V(val), jit->coroutine.promise);
4784 auto action = jit->builder->CreateCall(coro_suspend, {
4785 ::llvm::ConstantTokenNone::get(jit->context),
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004786 ::llvm::ConstantInt::get(i1Ty, 0), // final: true
4787 });
Ben Clayton6f8e5652019-06-29 01:58:02 +01004788 auto switch_ = jit->builder->CreateSwitch(action, jit->coroutine.suspendBlock, 3);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004789 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionResume), resumeBlock);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004790 switch_->addCase(::llvm::ConstantInt::get(i8Ty, SuspendActionDestroy), jit->coroutine.destroyBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004791
4792 // Continue building in the resume block.
Ben Clayton6f8e5652019-06-29 01:58:02 +01004793 jit->builder->SetInsertPoint(resumeBlock);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004794}
4795
Ben Clayton6897e9b2019-07-16 17:27:27 +01004796std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004797{
Ben Clayton16da2812019-07-09 23:28:51 +01004798 bool isCoroutine = jit->coroutine.id != nullptr;
4799 if (isCoroutine)
4800 {
4801 jit->builder->CreateBr(jit->coroutine.endBlock);
4802 }
4803 else
4804 {
4805 // Coroutine without a Yield acts as a regular function.
4806 // The 'coroutine_begin' function returns a nullptr for the coroutine
4807 // handle.
4808 jit->builder->CreateRet(llvm::Constant::getNullValue(jit->function->getReturnType()));
4809 // The 'coroutine_await' function always returns false (coroutine done).
4810 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.await));
4811 jit->builder->CreateRet(llvm::Constant::getNullValue(jit->coroutine.await->getReturnType()));
4812 // The 'coroutine_destroy' does nothing, returns void.
4813 jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.destroy));
4814 jit->builder->CreateRetVoid();
4815 }
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004816
4817#ifdef ENABLE_RR_DEBUG_INFO
Ben Clayton6f8e5652019-06-29 01:58:02 +01004818 if (jit->debugInfo != nullptr)
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004819 {
Ben Clayton6f8e5652019-06-29 01:58:02 +01004820 jit->debugInfo->Finalize();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004821 }
4822#endif // ENABLE_RR_DEBUG_INFO
4823
4824 if(false)
4825 {
4826 std::error_code error;
4827 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004828 jit->module->print(file, 0);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004829 }
4830
Ben Clayton16da2812019-07-09 23:28:51 +01004831 if (isCoroutine)
4832 {
4833 // Run manadory coroutine transforms.
4834 llvm::legacy::PassManager pm;
4835 pm.add(llvm::createCoroEarlyPass());
4836 pm.add(llvm::createCoroSplitPass());
4837 pm.add(llvm::createCoroElidePass());
4838 pm.add(llvm::createBarrierNoopPass());
4839 pm.add(llvm::createCoroCleanupPass());
4840 pm.run(*jit->module);
4841 }
4842
4843#if defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
4844 {
4845 llvm::legacy::PassManager pm;
4846 pm.add(llvm::createVerifierPass());
4847 pm.run(*jit->module);
4848 }
4849#endif // defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004850
Ben Clayton55bc37a2019-07-04 12:17:12 +01004851 auto cfg = cfgEdit.apply(jit->config);
4852 jit->optimize(cfg);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004853
4854 if(false)
4855 {
4856 std::error_code error;
4857 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004858 jit->module->print(file, 0);
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004859 }
4860
4861 llvm::Function *funcs[Nucleus::CoroutineEntryCount];
Ben Clayton6f8e5652019-06-29 01:58:02 +01004862 funcs[Nucleus::CoroutineEntryBegin] = jit->function;
4863 funcs[Nucleus::CoroutineEntryAwait] = jit->coroutine.await;
4864 funcs[Nucleus::CoroutineEntryDestroy] = jit->coroutine.destroy;
Ben Clayton55bc37a2019-07-04 12:17:12 +01004865 auto routine = jit->acquireRoutine(funcs, Nucleus::CoroutineEntryCount, cfg);
Ben Clayton6f8e5652019-06-29 01:58:02 +01004866 jit.reset();
Ben Clayton1c82c7b2019-04-30 12:49:27 +01004867
4868 return routine;
4869}
4870
4871} // namespace rr