blob: c04b300a19caa1771568c1a9c03a2a46d80a5d79 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
Nicolas Capenscb986762017-01-20 11:34:37 -050015#include "Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040016
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040017#include "x86.hpp"
18#include "CPUID.hpp"
19#include "Thread.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040020#include "ExecutableMemory.hpp"
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040021#include "MutexLock.hpp"
22
23#undef min
24#undef max
25
Nicolas Capensf417d9d2018-10-10 10:49:30 -040026#if REACTOR_LLVM_VERSION < 7
Logan Chien0eedc8c2018-08-21 09:34:28 +080027 #include "llvm/Analysis/LoopPass.h"
28 #include "llvm/Constants.h"
29 #include "llvm/Function.h"
30 #include "llvm/GlobalVariable.h"
31 #include "llvm/Intrinsics.h"
32 #include "llvm/LLVMContext.h"
33 #include "llvm/Module.h"
34 #include "llvm/PassManager.h"
35 #include "llvm/Support/IRBuilder.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Target/TargetData.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/Transforms/Scalar.h"
40 #include "../lib/ExecutionEngine/JIT/JIT.h"
John Bauman89401822014-05-06 15:04:28 -040041
Logan Chien0eedc8c2018-08-21 09:34:28 +080042 #include "LLVMRoutine.hpp"
43 #include "LLVMRoutineManager.hpp"
44
45 #define ARGS(...) __VA_ARGS__
46#else
47 #include "llvm/Analysis/LoopPass.h"
48 #include "llvm/ExecutionEngine/ExecutionEngine.h"
49 #include "llvm/ExecutionEngine/JITSymbol.h"
50 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
51 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
52 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
53 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
54 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
55 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
56 #include "llvm/IR/Constants.h"
57 #include "llvm/IR/DataLayout.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/GlobalVariable.h"
60 #include "llvm/IR/IRBuilder.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/LegacyPassManager.h"
Nicolas Capensadfbbcb2018-10-31 14:38:53 -040064 #include "llvm/IR/Mangler.h"
Logan Chien0eedc8c2018-08-21 09:34:28 +080065 #include "llvm/IR/Module.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/TargetSelect.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include "llvm/Transforms/InstCombine/InstCombine.h"
70 #include "llvm/Transforms/Scalar.h"
71 #include "llvm/Transforms/Scalar/GVN.h"
72
73 #include "LLVMRoutine.hpp"
74
75 #define ARGS(...) {__VA_ARGS__}
76 #define CreateCall2 CreateCall
77 #define CreateCall3 CreateCall
Logan Chien40a60052018-09-26 19:03:53 +080078
79 #include <unordered_map>
Logan Chien0eedc8c2018-08-21 09:34:28 +080080#endif
81
John Bauman89401822014-05-06 15:04:28 -040082#include <fstream>
Ben Clayton1bc7ee92019-02-14 18:43:22 +000083#include <numeric>
84#include <thread>
John Bauman89401822014-05-06 15:04:28 -040085
Nicolas Capens47dc8672017-04-25 12:54:39 -040086#if defined(__i386__) || defined(__x86_64__)
87#include <xmmintrin.h>
88#endif
89
Logan Chien40a60052018-09-26 19:03:53 +080090#include <math.h>
91
Nicolas Capenscb122582014-05-06 23:34:44 -040092#if defined(__x86_64__) && defined(_WIN32)
John Bauman66b8ab22014-05-06 15:57:45 -040093extern "C" void X86CompilationCallback()
94{
95 assert(false); // UNIMPLEMENTED
96}
97#endif
98
Nicolas Capensf417d9d2018-10-10 10:49:30 -040099#if REACTOR_LLVM_VERSION < 7
John Bauman89401822014-05-06 15:04:28 -0400100namespace llvm
101{
102 extern bool JITEmitDebugInfo;
103}
Logan Chien0eedc8c2018-08-21 09:34:28 +0800104#endif
John Bauman89401822014-05-06 15:04:28 -0400105
Nicolas Capens48461502018-08-06 14:20:45 -0400106namespace rr
Logan Chien52cde602018-09-03 19:37:57 +0800107{
108 class LLVMReactorJIT;
109}
110
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400111namespace
112{
Nicolas Capens48461502018-08-06 14:20:45 -0400113 rr::LLVMReactorJIT *reactorJIT = nullptr;
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400114 llvm::IRBuilder<> *builder = nullptr;
115 llvm::LLVMContext *context = nullptr;
116 llvm::Module *module = nullptr;
117 llvm::Function *function = nullptr;
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400118
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400119 rr::MutexLock codegenMutex;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800120
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000121#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000122 std::string replace(std::string str, const std::string& substr, const std::string& replacement)
123 {
124 size_t pos = 0;
125 while((pos = str.find(substr, pos)) != std::string::npos) {
126 str.replace(pos, substr.length(), replacement);
127 pos += replacement.length();
128 }
129 return str;
130 }
Ben Clayton60a3d6f2019-02-26 17:24:46 +0000131#endif // ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000132
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400133#if REACTOR_LLVM_VERSION >= 7
Logan Chien0eedc8c2018-08-21 09:34:28 +0800134 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
135 {
136 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
137
138 llvm::VectorType *extTy =
139 llvm::VectorType::getExtendedElementVectorType(ty);
140 x = ::builder->CreateZExt(x, extTy);
141 y = ::builder->CreateZExt(y, extTy);
142
143 // (x + y + 1) >> 1
144 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
145 llvm::Value *res = ::builder->CreateAdd(x, y);
146 res = ::builder->CreateAdd(res, one);
147 res = ::builder->CreateLShr(res, one);
148 return ::builder->CreateTrunc(res, ty);
149 }
150
151 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800152 llvm::ICmpInst::Predicate pred)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800153 {
154 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
155 }
156
157 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
Logan Chienb5ce5092018-09-27 18:45:58 +0800158 llvm::Value *y, llvm::Type *dstTy)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800159 {
160 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
161 }
162
Logan Chiene3191012018-08-24 22:01:50 +0800163#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800164 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
165 {
166 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
167 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
168
169 llvm::Value *undef = llvm::UndefValue::get(srcTy);
170 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
171 std::iota(mask.begin(), mask.end(), 0);
172 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
173
174 return sext ? ::builder->CreateSExt(v, dstTy)
Logan Chienb5ce5092018-09-27 18:45:58 +0800175 : ::builder->CreateZExt(v, dstTy);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800176 }
177
178 llvm::Value *lowerPABS(llvm::Value *v)
179 {
180 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
181 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
182 llvm::Value *neg = ::builder->CreateNeg(v);
183 return ::builder->CreateSelect(cmp, v, neg);
184 }
185#endif // defined(__i386__) || defined(__x86_64__)
Logan Chiene3191012018-08-24 22:01:50 +0800186
187#if !defined(__i386__) && !defined(__x86_64__)
188 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
Logan Chienb5ce5092018-09-27 18:45:58 +0800189 llvm::FCmpInst::Predicate pred)
Logan Chiene3191012018-08-24 22:01:50 +0800190 {
191 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
192 }
193
Logan Chien83fc07a2018-09-26 22:14:00 +0800194 llvm::Value *lowerRound(llvm::Value *x)
195 {
196 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
197 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
198 return ::builder->CreateCall(nearbyint, ARGS(x));
199 }
200
Logan Chien2faa24a2018-09-26 19:59:32 +0800201 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
202 {
203 return ::builder->CreateFPToSI(lowerRound(x), ty);
204 }
205
Logan Chien40a60052018-09-26 19:03:53 +0800206 llvm::Value *lowerFloor(llvm::Value *x)
207 {
208 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
209 ::module, llvm::Intrinsic::floor, {x->getType()});
210 return ::builder->CreateCall(floor, ARGS(x));
211 }
212
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800213 llvm::Value *lowerTrunc(llvm::Value *x)
214 {
215 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
216 ::module, llvm::Intrinsic::trunc, {x->getType()});
217 return ::builder->CreateCall(trunc, ARGS(x));
218 }
219
Logan Chiene3191012018-08-24 22:01:50 +0800220 // Packed add/sub saturatation
Logan Chien28794cf2018-09-26 18:58:03 +0800221 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
Logan Chiene3191012018-08-24 22:01:50 +0800222 {
Logan Chien28794cf2018-09-26 18:58:03 +0800223 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
224 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
225
226 unsigned numBits = ty->getScalarSizeInBits();
227
228 llvm::Value *max, *min, *extX, *extY;
229 if (isSigned)
230 {
231 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
232 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
233 extX = ::builder->CreateSExt(x, extTy);
234 extY = ::builder->CreateSExt(y, extTy);
235 }
236 else
237 {
238 assert(numBits <= 64);
239 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
240 max = llvm::ConstantInt::get(extTy, maxVal, false);
241 min = llvm::ConstantInt::get(extTy, 0, false);
242 extX = ::builder->CreateZExt(x, extTy);
243 extY = ::builder->CreateZExt(y, extTy);
244 }
245
246 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
247 : ::builder->CreateSub(extX, extY);
248
249 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
250 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
251
252 return ::builder->CreateTrunc(res, ty);
Logan Chiene3191012018-08-24 22:01:50 +0800253 }
254
255 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
256 {
Logan Chien28794cf2018-09-26 18:58:03 +0800257 return lowerPSAT(x, y, true, false);
Logan Chiene3191012018-08-24 22:01:50 +0800258 }
259
260 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
261 {
Logan Chien28794cf2018-09-26 18:58:03 +0800262 return lowerPSAT(x, y, true, true);
Logan Chiene3191012018-08-24 22:01:50 +0800263 }
264
265 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
266 {
Logan Chien28794cf2018-09-26 18:58:03 +0800267 return lowerPSAT(x, y, false, false);
Logan Chiene3191012018-08-24 22:01:50 +0800268 }
269
270 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
271 {
Logan Chien28794cf2018-09-26 18:58:03 +0800272 return lowerPSAT(x, y, false, true);
Logan Chiene3191012018-08-24 22:01:50 +0800273 }
274
275 llvm::Value *lowerSQRT(llvm::Value *x)
276 {
277 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
278 ::module, llvm::Intrinsic::sqrt, {x->getType()});
279 return ::builder->CreateCall(sqrt, ARGS(x));
280 }
281
282 llvm::Value *lowerRCP(llvm::Value *x)
283 {
284 llvm::Type *ty = x->getType();
285 llvm::Constant *one;
286 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
287 {
288 one = llvm::ConstantVector::getSplat(
289 vectorTy->getNumElements(),
290 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
291 }
292 else
293 {
294 one = llvm::ConstantFP::get(ty, 1);
295 }
296 return ::builder->CreateFDiv(one, x);
297 }
298
299 llvm::Value *lowerRSQRT(llvm::Value *x)
300 {
301 return lowerRCP(lowerSQRT(x));
302 }
303
304 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
305 {
306 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
307 llvm::Value *y = llvm::ConstantVector::getSplat(
308 ty->getNumElements(),
309 llvm::ConstantInt::get(ty->getElementType(), scalarY));
310 return ::builder->CreateShl(x, y);
311 }
312
313 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
314 {
315 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
316 llvm::Value *y = llvm::ConstantVector::getSplat(
317 ty->getNumElements(),
318 llvm::ConstantInt::get(ty->getElementType(), scalarY));
319 return ::builder->CreateAShr(x, y);
320 }
321
322 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
323 {
324 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
325 llvm::Value *y = llvm::ConstantVector::getSplat(
326 ty->getNumElements(),
327 llvm::ConstantInt::get(ty->getElementType(), scalarY));
328 return ::builder->CreateLShr(x, y);
329 }
330
331 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
332 {
333 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
334 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
335
336 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
337 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
338 llvm::Value *mult = ::builder->CreateMul(extX, extY);
339
340 llvm::Value *undef = llvm::UndefValue::get(extTy);
341
342 llvm::SmallVector<uint32_t, 16> evenIdx;
343 llvm::SmallVector<uint32_t, 16> oddIdx;
344 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
345 {
346 evenIdx.push_back(i);
347 oddIdx.push_back(i + 1);
348 }
349
350 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
351 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
352 return ::builder->CreateAdd(lhs, rhs);
353 }
354
Logan Chiene3191012018-08-24 22:01:50 +0800355 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
356 {
357 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
358 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
359
360 llvm::IntegerType *dstElemTy =
361 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
362
363 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
364 assert(truncNumBits < 64 && "shift 64 must be handled separately");
365 llvm::Constant *max, *min;
366 if (isSigned)
367 {
368 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
369 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
370 }
371 else
372 {
373 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
374 min = llvm::ConstantInt::get(srcTy, 0, false);
375 }
376
377 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
378 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
379 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
380 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
381
382 x = ::builder->CreateTrunc(x, dstTy);
383 y = ::builder->CreateTrunc(y, dstTy);
384
385 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
386 std::iota(index.begin(), index.end(), 0);
387
388 return ::builder->CreateShuffleVector(x, y, index);
389 }
390
391 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
392 {
393 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
394 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
395 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
396
397 llvm::Value *ret = ::builder->CreateZExt(
398 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
399 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
400 {
401 llvm::Value *elem = ::builder->CreateZExt(
402 ::builder->CreateExtractElement(cmp, i), retTy);
403 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
404 }
405 return ret;
406 }
407
408 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
409 {
410 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
411 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
412 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
413
414 llvm::Value *ret = ::builder->CreateZExt(
415 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
416 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
417 {
418 llvm::Value *elem = ::builder->CreateZExt(
419 ::builder->CreateExtractElement(cmp, i), retTy);
420 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
421 }
422 return ret;
423 }
424#endif // !defined(__i386__) && !defined(__x86_64__)
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400425#endif // REACTOR_LLVM_VERSION >= 7
Chris Forbese86b6dc2019-03-01 09:08:47 -0800426
427 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
428 {
429 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
430 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
431
432 llvm::Value *extX, *extY;
433 if (sext)
434 {
435 extX = ::builder->CreateSExt(x, extTy);
436 extY = ::builder->CreateSExt(y, extTy);
437 }
438 else
439 {
440 extX = ::builder->CreateZExt(x, extTy);
441 extY = ::builder->CreateZExt(y, extTy);
442 }
443
444 llvm::Value *mult = ::builder->CreateMul(extX, extY);
445
446 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
447 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
448 return ::builder->CreateTrunc(mulh, ty);
449 }
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400450}
451
Nicolas Capens48461502018-08-06 14:20:45 -0400452namespace rr
John Bauman89401822014-05-06 15:04:28 -0400453{
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400454#if REACTOR_LLVM_VERSION < 7
Logan Chien52cde602018-09-03 19:37:57 +0800455 class LLVMReactorJIT
456 {
457 private:
458 std::string arch;
459 llvm::SmallVector<std::string, 16> mattrs;
Logan Chien52cde602018-09-03 19:37:57 +0800460 llvm::ExecutionEngine *executionEngine;
Nicolas Capens48461502018-08-06 14:20:45 -0400461 LLVMRoutineManager *routineManager;
Logan Chien52cde602018-09-03 19:37:57 +0800462
463 public:
464 LLVMReactorJIT(const std::string &arch_,
Logan Chienb5ce5092018-09-27 18:45:58 +0800465 const llvm::SmallVectorImpl<std::string> &mattrs_) :
Logan Chien52cde602018-09-03 19:37:57 +0800466 arch(arch_),
467 mattrs(mattrs_.begin(), mattrs_.end()),
Nicolas Capens48461502018-08-06 14:20:45 -0400468 executionEngine(nullptr),
469 routineManager(nullptr)
Logan Chien52cde602018-09-03 19:37:57 +0800470 {
471 }
472
473 void startSession()
474 {
475 std::string error;
476
477 ::module = new llvm::Module("", *::context);
478
479 routineManager = new LLVMRoutineManager();
480
481 llvm::TargetMachine *targetMachine =
482 llvm::EngineBuilder::selectTarget(
483 ::module, arch, "", mattrs, llvm::Reloc::Default,
484 llvm::CodeModel::JITDefault, &error);
485
486 executionEngine = llvm::JIT::createJIT(
487 ::module, &error, routineManager, llvm::CodeGenOpt::Aggressive,
488 true, targetMachine);
489 }
490
491 void endSession()
492 {
493 delete executionEngine;
494 executionEngine = nullptr;
495 routineManager = nullptr;
496
497 ::function = nullptr;
498 ::module = nullptr;
499 }
500
501 LLVMRoutine *acquireRoutine(llvm::Function *func)
502 {
503 void *entry = executionEngine->getPointerToFunction(::function);
504 return routineManager->acquireRoutine(entry);
505 }
506
507 void optimize(llvm::Module *module)
508 {
509 static llvm::PassManager *passManager = nullptr;
510
511 if(!passManager)
512 {
513 passManager = new llvm::PassManager();
514
515 passManager->add(new llvm::TargetData(*executionEngine->getTargetData()));
516 passManager->add(llvm::createScalarReplAggregatesPass());
517
518 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
519 {
520 switch(optimization[pass])
521 {
522 case Disabled: break;
523 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
524 case LICM: passManager->add(llvm::createLICMPass()); break;
525 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
526 case GVN: passManager->add(llvm::createGVNPass()); break;
527 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
528 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
529 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
530 case SCCP: passManager->add(llvm::createSCCPPass()); break;
531 case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
532 default:
533 assert(false);
534 }
535 }
536 }
537
538 passManager->run(*::module);
539 }
540 };
Logan Chien0eedc8c2018-08-21 09:34:28 +0800541#else
Logan Chien40a60052018-09-26 19:03:53 +0800542 class ExternalFunctionSymbolResolver
543 {
544 private:
545 using FunctionMap = std::unordered_map<std::string, void *>;
546 FunctionMap func_;
547
548 public:
549 ExternalFunctionSymbolResolver()
550 {
551 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
Logan Chien83fc07a2018-09-26 22:14:00 +0800552 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
Logan Chien8c5ca8d2018-09-27 21:05:53 +0800553 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000554 func_.emplace("printf", reinterpret_cast<void*>(printf));
555 func_.emplace("puts", reinterpret_cast<void*>(puts));
Chris Forbes1a4c7122019-03-15 14:50:47 -0700556 func_.emplace("fmodf", reinterpret_cast<void*>(fmodf));
Ben Claytona2c8b772019-04-09 13:42:36 -0400557 func_.emplace("sinf", reinterpret_cast<void*>(sinf));
Ben Clayton1b6f8c72019-04-09 13:47:43 -0400558 func_.emplace("cosf", reinterpret_cast<void*>(cosf));
Ben Claytonf9350d72019-04-09 14:19:02 -0400559 func_.emplace("asinf", reinterpret_cast<void*>(asinf));
Ben Claytoneafae472019-04-09 14:22:38 -0400560 func_.emplace("acosf", reinterpret_cast<void*>(acosf));
Ben Clayton749b4e02019-04-09 14:27:43 -0400561 func_.emplace("atanf", reinterpret_cast<void*>(atanf));
Ben Claytond9636972019-04-09 15:09:54 -0400562 func_.emplace("sinhf", reinterpret_cast<void*>(sinhf));
Ben Clayton900ea2c2019-04-09 15:25:36 -0400563 func_.emplace("coshf", reinterpret_cast<void*>(coshf));
Ben Clayton3928bd92019-04-09 15:27:41 -0400564 func_.emplace("tanhf", reinterpret_cast<void*>(tanhf));
Ben Claytonf6d77ab2019-04-09 15:30:04 -0400565 func_.emplace("asinhf", reinterpret_cast<void*>(asinhf));
Ben Clayton28ebcb02019-04-09 15:33:38 -0400566 func_.emplace("acoshf", reinterpret_cast<void*>(acoshf));
Ben Claytonfa6a5392019-04-09 15:35:24 -0400567 func_.emplace("atanhf", reinterpret_cast<void*>(atanhf));
Ben Claytona520c3e2019-04-09 15:43:45 -0400568 func_.emplace("atan2f", reinterpret_cast<void*>(atan2f));
Ben Claytonbfe94f02019-04-09 15:52:12 -0400569 func_.emplace("powf", reinterpret_cast<void*>(powf));
Ben Clayton14740062019-04-09 13:48:41 -0400570
571#ifdef __APPLE__
572 // LLVM uses this function on macOS for tan.
573 func_.emplace("sincosf_stret", reinterpret_cast<void*>(__sincosf_stret));
574#elif defined(__linux__)
575 func_.emplace("sincosf", reinterpret_cast<void*>(sincosf));
576#endif // __APPLE__
Logan Chien40a60052018-09-26 19:03:53 +0800577 }
578
579 void *findSymbol(const std::string &name) const
580 {
Ben Clayton1bc7ee92019-02-14 18:43:22 +0000581 // Trim off any underscores from the start of the symbol. LLVM likes
582 // to append these on macOS.
583 const char* trimmed = name.c_str();
584 while (trimmed[0] == '_') { trimmed++; }
585
586 FunctionMap::const_iterator it = func_.find(trimmed);
587 assert(it != func_.end()); // Missing functions will likely make the module fail in exciting non-obvious ways.
588 return it->second;
Logan Chien40a60052018-09-26 19:03:53 +0800589 }
590 };
591
Logan Chien0eedc8c2018-08-21 09:34:28 +0800592 class LLVMReactorJIT
593 {
594 private:
595 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
596 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
597
598 llvm::orc::ExecutionSession session;
Logan Chien40a60052018-09-26 19:03:53 +0800599 ExternalFunctionSymbolResolver externalSymbolResolver;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800600 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
601 std::unique_ptr<llvm::TargetMachine> targetMachine;
602 const llvm::DataLayout dataLayout;
603 ObjLayer objLayer;
604 CompileLayer compileLayer;
605 size_t emittedFunctionsNum;
606
607 public:
608 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
609 const llvm::TargetOptions &targetOpts):
610 resolver(createLegacyLookupResolver(
611 session,
612 [this](const std::string &name) {
Logan Chien40a60052018-09-26 19:03:53 +0800613 void *func = externalSymbolResolver.findSymbol(name);
614 if (func != nullptr)
615 {
616 return llvm::JITSymbol(
617 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
618 }
619
Logan Chien0eedc8c2018-08-21 09:34:28 +0800620 return objLayer.findSymbol(name, true);
621 },
622 [](llvm::Error err) {
623 if (err)
624 {
625 // TODO: Log the symbol resolution errors.
626 return;
627 }
628 })),
629 targetMachine(llvm::EngineBuilder()
630 .setMArch(arch)
631 .setMAttrs(mattrs)
632 .setTargetOptions(targetOpts)
633 .selectTarget()),
634 dataLayout(targetMachine->createDataLayout()),
635 objLayer(
636 session,
637 [this](llvm::orc::VModuleKey) {
638 return ObjLayer::Resources{
639 std::make_shared<llvm::SectionMemoryManager>(),
640 resolver};
641 }),
642 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
643 emittedFunctionsNum(0)
644 {
645 }
646
647 void startSession()
648 {
649 ::module = new llvm::Module("", *::context);
650 }
651
652 void endSession()
653 {
654 ::function = nullptr;
655 ::module = nullptr;
656 }
657
658 LLVMRoutine *acquireRoutine(llvm::Function *func)
659 {
660 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
661 func->setName(name);
662 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
663 func->setDoesNotThrow();
664
665 std::unique_ptr<llvm::Module> mod(::module);
666 ::module = nullptr;
667 mod->setDataLayout(dataLayout);
668
669 auto moduleKey = session.allocateVModule();
670 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
671
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400672 std::string mangledName;
673 {
674 llvm::raw_string_ostream mangledNameStream(mangledName);
675 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
676 }
677
678 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800679
680 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
Nicolas Capensadfbbcb2018-10-31 14:38:53 -0400681 if(!expectAddr)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800682 {
683 return nullptr;
684 }
685
686 void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
687 return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey);
688 }
689
690 void optimize(llvm::Module *module)
691 {
692 std::unique_ptr<llvm::legacy::PassManager> passManager(
693 new llvm::legacy::PassManager());
694
695 passManager->add(llvm::createSROAPass());
696
697 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
698 {
699 switch(optimization[pass])
700 {
701 case Disabled: break;
702 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
703 case LICM: passManager->add(llvm::createLICMPass()); break;
704 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
705 case GVN: passManager->add(llvm::createGVNPass()); break;
706 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
707 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
708 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
709 case SCCP: passManager->add(llvm::createSCCPPass()); break;
710 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
711 default:
Logan Chienb5ce5092018-09-27 18:45:58 +0800712 assert(false);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800713 }
714 }
715
716 passManager->run(*::module);
717 }
718
719 private:
720 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
721 {
722 llvm::cantFail(compileLayer.removeModule(moduleKey));
723 }
724
725 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
726 {
727 jit->releaseRoutineModule(moduleKey);
728 }
729 };
730#endif
Logan Chien52cde602018-09-03 19:37:57 +0800731
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400732 Optimization optimization[10] = {InstructionCombining, Disabled};
John Bauman89401822014-05-06 15:04:28 -0400733
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500734 // The abstract Type* types are implemented as LLVM types, except that
735 // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
736 // and VFP in ARM, and eliminate the overhead of converting them to explicit
737 // 128-bit ones. LLVM types are pointers, so we can represent emulated types
738 // as abstract pointers with small enum values.
739 enum InternalType : uintptr_t
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400740 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500741 // Emulated types:
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400742 Type_v2i32,
743 Type_v4i16,
744 Type_v2i16,
745 Type_v8i8,
746 Type_v4i8,
747 Type_v2f32,
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500748 EmulatedTypeCount,
749 // Returned by asInternalType() to indicate that the abstract Type*
750 // should be interpreted as LLVM type pointer:
751 Type_LLVM
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400752 };
753
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500754 inline InternalType asInternalType(Type *type)
755 {
756 InternalType t = static_cast<InternalType>(reinterpret_cast<uintptr_t>(type));
757 return (t < EmulatedTypeCount) ? t : Type_LLVM;
758 }
759
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400760 llvm::Type *T(Type *t)
761 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500762 // Use 128-bit vectors to implement logically shorter ones.
763 switch(asInternalType(t))
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400764 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500765 case Type_v2i32: return T(Int4::getType());
766 case Type_v4i16: return T(Short8::getType());
767 case Type_v2i16: return T(Short8::getType());
768 case Type_v8i8: return T(Byte16::getType());
769 case Type_v4i8: return T(Byte16::getType());
770 case Type_v2f32: return T(Float4::getType());
771 case Type_LLVM: return reinterpret_cast<llvm::Type*>(t);
772 default: assert(false); return nullptr;
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400773 }
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400774 }
775
Nicolas Capensac230122016-09-20 14:30:06 -0400776 inline Type *T(llvm::Type *t)
777 {
778 return reinterpret_cast<Type*>(t);
779 }
780
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500781 Type *T(InternalType t)
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400782 {
783 return reinterpret_cast<Type*>(t);
784 }
785
Logan Chien191b3052018-08-31 16:57:15 +0800786 inline llvm::Value *V(Value *t)
787 {
788 return reinterpret_cast<llvm::Value*>(t);
789 }
790
Nicolas Capens19336542016-09-26 10:32:29 -0400791 inline Value *V(llvm::Value *t)
792 {
793 return reinterpret_cast<Value*>(t);
794 }
795
Nicolas Capensac230122016-09-20 14:30:06 -0400796 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
797 {
798 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
799 }
800
Logan Chien191b3052018-08-31 16:57:15 +0800801 inline llvm::BasicBlock *B(BasicBlock *t)
802 {
803 return reinterpret_cast<llvm::BasicBlock*>(t);
804 }
805
Nicolas Capensc8b67a42016-09-25 15:02:52 -0400806 inline BasicBlock *B(llvm::BasicBlock *t)
807 {
808 return reinterpret_cast<BasicBlock*>(t);
809 }
810
Nicolas Capens01a97962017-07-28 17:30:51 -0400811 static size_t typeSize(Type *type)
812 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500813 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -0400814 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500815 case Type_v2i32: return 8;
816 case Type_v4i16: return 8;
817 case Type_v2i16: return 4;
818 case Type_v8i8: return 8;
819 case Type_v4i8: return 4;
820 case Type_v2f32: return 8;
821 case Type_LLVM:
Nicolas Capens01a97962017-07-28 17:30:51 -0400822 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500823 llvm::Type *t = T(type);
Nicolas Capens01a97962017-07-28 17:30:51 -0400824
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500825 if(t->isPointerTy())
826 {
827 return sizeof(void*);
828 }
829
830 // At this point we should only have LLVM 'primitive' types.
831 unsigned int bits = t->getPrimitiveSizeInBits();
832 assert(bits != 0);
833
834 // TODO(capn): Booleans are 1 bit integers in LLVM's SSA type system,
835 // but are typically stored as one byte. The DataLayout structure should
836 // be used here and many other places if this assumption fails.
837 return (bits + 7) / 8;
838 }
839 break;
840 default:
841 assert(false);
842 return 0;
843 }
Nicolas Capens01a97962017-07-28 17:30:51 -0400844 }
845
Nicolas Capens69674fb2017-09-01 11:08:44 -0400846 static unsigned int elementCount(Type *type)
847 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500848 switch(asInternalType(type))
Nicolas Capens69674fb2017-09-01 11:08:44 -0400849 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -0500850 case Type_v2i32: return 2;
851 case Type_v4i16: return 4;
852 case Type_v2i16: return 2;
853 case Type_v8i8: return 8;
854 case Type_v4i8: return 4;
855 case Type_v2f32: return 2;
856 case Type_LLVM: return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
857 default: assert(false); return 0;
Nicolas Capens69674fb2017-09-01 11:08:44 -0400858 }
Nicolas Capens69674fb2017-09-01 11:08:44 -0400859 }
860
Nicolas Capens86509d92019-03-21 13:23:50 -0400861 static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
862 {
863 #if REACTOR_LLVM_VERSION < 7
864 return llvm::AtomicOrdering::NotAtomic;
865 #endif
866
867 if(!atomic)
868 {
869 return llvm::AtomicOrdering::NotAtomic;
870 }
871
872 switch(memoryOrder)
873 {
874 case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic; // https://llvm.org/docs/Atomics.html#monotonic
875 case std::memory_order_consume: return llvm::AtomicOrdering::Acquire; // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
876 case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
877 case std::memory_order_release: return llvm::AtomicOrdering::Release;
878 case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
879 case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
880 default: assert(false); return llvm::AtomicOrdering::AcquireRelease;
881 }
882 }
883
John Bauman89401822014-05-06 15:04:28 -0400884 Nucleus::Nucleus()
885 {
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400886 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400887
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400888 llvm::InitializeNativeTarget();
John Bauman89401822014-05-06 15:04:28 -0400889
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400890#if REACTOR_LLVM_VERSION >= 7
Logan Chien0eedc8c2018-08-21 09:34:28 +0800891 llvm::InitializeNativeTargetAsmPrinter();
892 llvm::InitializeNativeTargetAsmParser();
893#endif
894
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400895 if(!::context)
John Bauman89401822014-05-06 15:04:28 -0400896 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400897 ::context = new llvm::LLVMContext();
John Bauman89401822014-05-06 15:04:28 -0400898 }
899
John Bauman89401822014-05-06 15:04:28 -0400900 #if defined(__x86_64__)
Logan Chien52cde602018-09-03 19:37:57 +0800901 static const char arch[] = "x86-64";
Logan Chiene3191012018-08-24 22:01:50 +0800902 #elif defined(__i386__)
Logan Chien52cde602018-09-03 19:37:57 +0800903 static const char arch[] = "x86";
Logan Chiene3191012018-08-24 22:01:50 +0800904 #elif defined(__aarch64__)
905 static const char arch[] = "arm64";
906 #elif defined(__arm__)
907 static const char arch[] = "arm";
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200908 #elif defined(__mips__)
Gordana Cmiljanovic20622c02018-11-05 15:00:11 +0100909 #if defined(__mips64)
910 static const char arch[] = "mips64el";
911 #else
912 static const char arch[] = "mipsel";
913 #endif
Logan Chiene3191012018-08-24 22:01:50 +0800914 #else
915 #error "unknown architecture"
John Bauman89401822014-05-06 15:04:28 -0400916 #endif
917
Logan Chien52cde602018-09-03 19:37:57 +0800918 llvm::SmallVector<std::string, 1> mattrs;
Logan Chiene3191012018-08-24 22:01:50 +0800919#if defined(__i386__) || defined(__x86_64__)
Logan Chien0eedc8c2018-08-21 09:34:28 +0800920 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
921 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
922 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
923 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
924 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
925 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400926#if REACTOR_LLVM_VERSION < 7
Logan Chien0eedc8c2018-08-21 09:34:28 +0800927 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
928#else
929 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
930#endif
Logan Chiene3191012018-08-24 22:01:50 +0800931#elif defined(__arm__)
932#if __ARM_ARCH >= 8
933 mattrs.push_back("+armv8-a");
934#else
935 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
936 // might fail to link.
937#endif
938#endif
John Bauman89401822014-05-06 15:04:28 -0400939
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400940#if REACTOR_LLVM_VERSION < 7
Logan Chien52cde602018-09-03 19:37:57 +0800941 llvm::JITEmitDebugInfo = false;
942 llvm::UnsafeFPMath = true;
943 // llvm::NoInfsFPMath = true;
944 // llvm::NoNaNsFPMath = true;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800945#else
946 llvm::TargetOptions targetOpts;
Nicolas Capensa7643812018-09-13 14:20:06 -0400947 targetOpts.UnsafeFPMath = false;
Logan Chien0eedc8c2018-08-21 09:34:28 +0800948 // targetOpts.NoInfsFPMath = true;
949 // targetOpts.NoNaNsFPMath = true;
950#endif
Logan Chien52cde602018-09-03 19:37:57 +0800951
952 if(!::reactorJIT)
953 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -0400954#if REACTOR_LLVM_VERSION < 7
Logan Chien52cde602018-09-03 19:37:57 +0800955 ::reactorJIT = new LLVMReactorJIT(arch, mattrs);
Logan Chien0eedc8c2018-08-21 09:34:28 +0800956#else
957 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
958#endif
Logan Chien52cde602018-09-03 19:37:57 +0800959 }
960
961 ::reactorJIT->startSession();
John Bauman89401822014-05-06 15:04:28 -0400962
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400963 if(!::builder)
John Bauman89401822014-05-06 15:04:28 -0400964 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400965 ::builder = new llvm::IRBuilder<>(*::context);
John Bauman89401822014-05-06 15:04:28 -0400966 }
967 }
968
969 Nucleus::~Nucleus()
970 {
Logan Chien52cde602018-09-03 19:37:57 +0800971 ::reactorJIT->endSession();
Nicolas Capensb7ea9842015-04-01 10:54:59 -0400972
Nicolas Capens3bbc5e12016-09-27 10:49:52 -0400973 ::codegenMutex.unlock();
John Bauman89401822014-05-06 15:04:28 -0400974 }
975
Chris Forbes878d4b02019-01-21 10:48:35 -0800976 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
John Bauman89401822014-05-06 15:04:28 -0400977 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -0400978 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
John Bauman19bac1e2014-05-06 15:23:49 -0400979 {
Nicolas Capensac230122016-09-20 14:30:06 -0400980 llvm::Type *type = ::function->getReturnType();
John Bauman19bac1e2014-05-06 15:23:49 -0400981
982 if(type->isVoidTy())
983 {
984 createRetVoid();
985 }
986 else
987 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -0400988 createRet(V(llvm::UndefValue::get(type)));
John Bauman19bac1e2014-05-06 15:23:49 -0400989 }
990 }
John Bauman89401822014-05-06 15:04:28 -0400991
992 if(false)
993 {
Nicolas Capens543629b2019-01-28 11:36:01 -0500994 #if REACTOR_LLVM_VERSION < 7
995 std::string error;
996 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-unopt.txt").c_str(), error);
997 #else
998 std::error_code error;
999 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
1000 #endif
1001
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001002 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001003 }
1004
1005 if(runOptimizations)
1006 {
1007 optimize();
1008 }
1009
1010 if(false)
1011 {
Nicolas Capens543629b2019-01-28 11:36:01 -05001012 #if REACTOR_LLVM_VERSION < 7
1013 std::string error;
1014 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-opt.txt").c_str(), error);
1015 #else
1016 std::error_code error;
1017 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
1018 #endif
1019
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001020 ::module->print(file, 0);
John Bauman89401822014-05-06 15:04:28 -04001021 }
1022
Logan Chien52cde602018-09-03 19:37:57 +08001023 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function);
John Bauman89401822014-05-06 15:04:28 -04001024
John Bauman89401822014-05-06 15:04:28 -04001025 return routine;
1026 }
1027
1028 void Nucleus::optimize()
1029 {
Logan Chien52cde602018-09-03 19:37:57 +08001030 ::reactorJIT->optimize(::module);
John Bauman89401822014-05-06 15:04:28 -04001031 }
1032
John Bauman19bac1e2014-05-06 15:23:49 -04001033 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
John Bauman89401822014-05-06 15:04:28 -04001034 {
1035 // Need to allocate it in the entry block for mem2reg to work
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001036 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
John Bauman89401822014-05-06 15:04:28 -04001037
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001038 llvm::Instruction *declaration;
John Bauman89401822014-05-06 15:04:28 -04001039
1040 if(arraySize)
1041 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04001042#if REACTOR_LLVM_VERSION < 7
Logan Chien191b3052018-08-31 16:57:15 +08001043 declaration = new llvm::AllocaInst(T(type), V(Nucleus::createConstantInt(arraySize)));
Logan Chien0eedc8c2018-08-21 09:34:28 +08001044#else
1045 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
1046#endif
John Bauman89401822014-05-06 15:04:28 -04001047 }
1048 else
1049 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04001050#if REACTOR_LLVM_VERSION < 7
Logan Chien191b3052018-08-31 16:57:15 +08001051 declaration = new llvm::AllocaInst(T(type), (llvm::Value*)nullptr);
Logan Chien0eedc8c2018-08-21 09:34:28 +08001052#else
1053 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
1054#endif
John Bauman89401822014-05-06 15:04:28 -04001055 }
1056
1057 entryBlock.getInstList().push_front(declaration);
1058
Nicolas Capens19336542016-09-26 10:32:29 -04001059 return V(declaration);
John Bauman89401822014-05-06 15:04:28 -04001060 }
1061
1062 BasicBlock *Nucleus::createBasicBlock()
1063 {
Logan Chien191b3052018-08-31 16:57:15 +08001064 return B(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001065 }
1066
1067 BasicBlock *Nucleus::getInsertBlock()
1068 {
Nicolas Capensc8b67a42016-09-25 15:02:52 -04001069 return B(::builder->GetInsertBlock());
John Bauman89401822014-05-06 15:04:28 -04001070 }
1071
1072 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1073 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001074 // assert(::builder->GetInsertBlock()->back().isTerminator());
Nicolas Capens0192d152019-03-27 14:46:07 -04001075
1076 Variable::materializeAll();
1077
Logan Chien191b3052018-08-31 16:57:15 +08001078 ::builder->SetInsertPoint(B(basicBlock));
John Bauman89401822014-05-06 15:04:28 -04001079 }
1080
Nicolas Capensac230122016-09-20 14:30:06 -04001081 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
John Bauman89401822014-05-06 15:04:28 -04001082 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001083 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001084 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
1085 ::function->setCallingConv(llvm::CallingConv::C);
John Bauman89401822014-05-06 15:04:28 -04001086
Nicolas Capensf417d9d2018-10-10 10:49:30 -04001087 #if defined(_WIN32) && REACTOR_LLVM_VERSION >= 7
Nicolas Capens52551d12018-09-13 14:30:56 -04001088 // FIXME(capn):
1089 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1090 // having a trap which allows the OS to grow the stack. For functions with a stack frame
1091 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1092 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1093 // the stack and ensure all pages have been committed. This is currently broken in LLVM
1094 // JIT, but we can prevent emitting the stack probe call:
1095 ::function->addFnAttr("stack-probe-size", "1048576");
1096 #endif
1097
Logan Chien191b3052018-08-31 16:57:15 +08001098 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
John Bauman89401822014-05-06 15:04:28 -04001099 }
1100
Nicolas Capens19336542016-09-26 10:32:29 -04001101 Value *Nucleus::getArgument(unsigned int index)
John Bauman89401822014-05-06 15:04:28 -04001102 {
Nicolas Capens5c1f5cc2016-09-23 16:45:13 -04001103 llvm::Function::arg_iterator args = ::function->arg_begin();
John Bauman89401822014-05-06 15:04:28 -04001104
1105 while(index)
1106 {
1107 args++;
1108 index--;
1109 }
1110
Nicolas Capens19336542016-09-26 10:32:29 -04001111 return V(&*args);
John Bauman89401822014-05-06 15:04:28 -04001112 }
1113
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001114 void Nucleus::createRetVoid()
John Bauman89401822014-05-06 15:04:28 -04001115 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001116 // Code generated after this point is unreachable, so any variables
1117 // being read can safely return an undefined value. We have to avoid
1118 // materializing variables after the terminator ret instruction.
1119 Variable::killUnmaterialized();
1120
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001121 ::builder->CreateRetVoid();
John Bauman89401822014-05-06 15:04:28 -04001122 }
1123
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001124 void Nucleus::createRet(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001125 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001126 // Code generated after this point is unreachable, so any variables
1127 // being read can safely return an undefined value. We have to avoid
1128 // materializing variables after the terminator ret instruction.
1129 Variable::killUnmaterialized();
1130
Logan Chien191b3052018-08-31 16:57:15 +08001131 ::builder->CreateRet(V(v));
John Bauman89401822014-05-06 15:04:28 -04001132 }
1133
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001134 void Nucleus::createBr(BasicBlock *dest)
John Bauman89401822014-05-06 15:04:28 -04001135 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001136 Variable::materializeAll();
1137
Logan Chien191b3052018-08-31 16:57:15 +08001138 ::builder->CreateBr(B(dest));
John Bauman89401822014-05-06 15:04:28 -04001139 }
1140
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001141 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001142 {
Nicolas Capens0192d152019-03-27 14:46:07 -04001143 Variable::materializeAll();
1144
Logan Chien191b3052018-08-31 16:57:15 +08001145 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
John Bauman89401822014-05-06 15:04:28 -04001146 }
1147
1148 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1149 {
Logan Chien191b3052018-08-31 16:57:15 +08001150 return V(::builder->CreateAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001151 }
1152
1153 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1154 {
Logan Chien191b3052018-08-31 16:57:15 +08001155 return V(::builder->CreateSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001156 }
1157
1158 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1159 {
Logan Chien191b3052018-08-31 16:57:15 +08001160 return V(::builder->CreateMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001161 }
1162
1163 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1164 {
Logan Chien191b3052018-08-31 16:57:15 +08001165 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001166 }
1167
1168 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1169 {
Logan Chien191b3052018-08-31 16:57:15 +08001170 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001171 }
1172
1173 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1174 {
Logan Chien191b3052018-08-31 16:57:15 +08001175 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001176 }
1177
1178 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1179 {
Logan Chien191b3052018-08-31 16:57:15 +08001180 return V(::builder->CreateFSub(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001181 }
1182
1183 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1184 {
Logan Chien191b3052018-08-31 16:57:15 +08001185 return V(::builder->CreateFMul(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001186 }
1187
1188 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1189 {
Logan Chien191b3052018-08-31 16:57:15 +08001190 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001191 }
1192
1193 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1194 {
Logan Chien191b3052018-08-31 16:57:15 +08001195 return V(::builder->CreateURem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001196 }
1197
1198 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1199 {
Logan Chien191b3052018-08-31 16:57:15 +08001200 return V(::builder->CreateSRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001201 }
1202
1203 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1204 {
Logan Chien191b3052018-08-31 16:57:15 +08001205 return V(::builder->CreateFRem(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001206 }
1207
1208 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1209 {
Logan Chien191b3052018-08-31 16:57:15 +08001210 return V(::builder->CreateShl(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001211 }
1212
1213 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1214 {
Logan Chien191b3052018-08-31 16:57:15 +08001215 return V(::builder->CreateLShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001216 }
1217
1218 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1219 {
Logan Chien191b3052018-08-31 16:57:15 +08001220 return V(::builder->CreateAShr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001221 }
1222
1223 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1224 {
Logan Chien191b3052018-08-31 16:57:15 +08001225 return V(::builder->CreateAnd(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001226 }
1227
1228 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1229 {
Logan Chien191b3052018-08-31 16:57:15 +08001230 return V(::builder->CreateOr(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001231 }
1232
1233 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1234 {
Logan Chien191b3052018-08-31 16:57:15 +08001235 return V(::builder->CreateXor(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001236 }
1237
Nicolas Capens19336542016-09-26 10:32:29 -04001238 Value *Nucleus::createNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001239 {
Logan Chien191b3052018-08-31 16:57:15 +08001240 return V(::builder->CreateNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001241 }
1242
Nicolas Capens19336542016-09-26 10:32:29 -04001243 Value *Nucleus::createFNeg(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001244 {
Logan Chien191b3052018-08-31 16:57:15 +08001245 return V(::builder->CreateFNeg(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001246 }
1247
Nicolas Capens19336542016-09-26 10:32:29 -04001248 Value *Nucleus::createNot(Value *v)
John Bauman89401822014-05-06 15:04:28 -04001249 {
Logan Chien191b3052018-08-31 16:57:15 +08001250 return V(::builder->CreateNot(V(v)));
John Bauman89401822014-05-06 15:04:28 -04001251 }
1252
Nicolas Capens86509d92019-03-21 13:23:50 -04001253 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001254 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001255 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001256 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001257 case Type_v2i32:
1258 case Type_v4i16:
1259 case Type_v8i8:
1260 case Type_v2f32:
1261 return createBitCast(
1262 createInsertElement(
1263 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
Nicolas Capens86509d92019-03-21 13:23:50 -04001264 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001265 0),
1266 type);
1267 case Type_v2i16:
1268 case Type_v4i8:
1269 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001270 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001271 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
Nicolas Capens86509d92019-03-21 13:23:50 -04001272 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001273 i = createZExt(i, Long::getType());
1274 Value *v = createInsertElement(u, i, 0);
1275 return createBitCast(v, type);
Nicolas Capens01a97962017-07-28 17:30:51 -04001276 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001277 // Fallthrough to non-emulated case.
1278 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001279 {
1280 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1281 auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
1282 load->setAtomic(atomicOrdering(atomic, memoryOrder));
1283
1284 return V(::builder->Insert(load));
1285 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001286 default:
1287 assert(false); return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001288 }
John Bauman89401822014-05-06 15:04:28 -04001289 }
1290
Nicolas Capens86509d92019-03-21 13:23:50 -04001291 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
John Bauman89401822014-05-06 15:04:28 -04001292 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001293 switch(asInternalType(type))
Nicolas Capens01a97962017-07-28 17:30:51 -04001294 {
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001295 case Type_v2i32:
1296 case Type_v4i16:
1297 case Type_v8i8:
1298 case Type_v2f32:
1299 createStore(
1300 createExtractElement(
1301 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1302 createBitCast(ptr, Pointer<Long>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001303 Long::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001304 return value;
1305 case Type_v2i16:
1306 case Type_v4i8:
1307 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
Nicolas Capens01a97962017-07-28 17:30:51 -04001308 {
Logan Chien191b3052018-08-31 16:57:15 +08001309 createStore(
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001310 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1311 createBitCast(ptr, Pointer<Int>::getType()),
Nicolas Capens86509d92019-03-21 13:23:50 -04001312 Int::getType(), isVolatile, alignment, atomic, memoryOrder);
Nicolas Capens01a97962017-07-28 17:30:51 -04001313 return value;
Nicolas Capens01a97962017-07-28 17:30:51 -04001314 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001315 // Fallthrough to non-emulated case.
1316 case Type_LLVM:
Nicolas Capens86509d92019-03-21 13:23:50 -04001317 {
1318 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1319 auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1320 store->setAtomic(atomicOrdering(atomic, memoryOrder));
1321
1322 return value;
1323 }
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001324 default:
1325 assert(false); return nullptr;
Nicolas Capens01a97962017-07-28 17:30:51 -04001326 }
John Bauman89401822014-05-06 15:04:28 -04001327 }
1328
Nicolas Capensd294def2017-01-26 17:44:37 -08001329 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
John Bauman89401822014-05-06 15:04:28 -04001330 {
Ben Claytonb1243732019-02-27 23:56:18 +00001331 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1332
Nicolas Capens01a97962017-07-28 17:30:51 -04001333 if(sizeof(void*) == 8)
Nicolas Capensd294def2017-01-26 17:44:37 -08001334 {
Ben Claytonb1243732019-02-27 23:56:18 +00001335 // LLVM manual: "When indexing into an array, pointer or vector,
1336 // integers of any width are allowed, and they are not required to
1337 // be constant. These integers are treated as signed values where
1338 // relevant."
1339 //
1340 // Thus if we want indexes to be treated as unsigned we have to
1341 // zero-extend them ourselves.
1342 //
1343 // Note that this is not because we want to address anywhere near
1344 // 4 GB of data. Instead this is important for performance because
1345 // x86 supports automatic zero-extending of 32-bit registers to
1346 // 64-bit. Thus when indexing into an array using a uint32 is
1347 // actually faster than an int32.
1348 index = unsignedIndex ?
1349 createZExt(index, Long::getType()) :
1350 createSExt(index, Long::getType());
Nicolas Capens01a97962017-07-28 17:30:51 -04001351 }
Ben Claytonb1243732019-02-27 23:56:18 +00001352
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001353 // For non-emulated types we can rely on LLVM's GEP to calculate the
1354 // effective address correctly.
1355 if(asInternalType(type) == Type_LLVM)
Nicolas Capens01a97962017-07-28 17:30:51 -04001356 {
Ben Claytonb1243732019-02-27 23:56:18 +00001357 return V(::builder->CreateGEP(V(ptr), V(index)));
Nicolas Capensd294def2017-01-26 17:44:37 -08001358 }
1359
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001360 // For emulated types we have to multiply the index by the intended
1361 // type size ourselves to obain the byte offset.
Ben Claytonb1243732019-02-27 23:56:18 +00001362 index = (sizeof(void*) == 8) ?
1363 createMul(index, createConstantLong((int64_t)typeSize(type))) :
1364 createMul(index, createConstantInt((int)typeSize(type)));
1365
Nicolas Capens1a5c3b92019-03-08 17:26:43 -05001366 // Cast to a byte pointer, apply the byte offset, and cast back to the
1367 // original pointer type.
Logan Chien191b3052018-08-31 16:57:15 +08001368 return createBitCast(
1369 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1370 T(llvm::PointerType::get(T(type), 0)));
John Bauman89401822014-05-06 15:04:28 -04001371 }
1372
John Bauman19bac1e2014-05-06 15:23:49 -04001373 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
1374 {
Logan Chien191b3052018-08-31 16:57:15 +08001375 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), llvm::AtomicOrdering::SequentiallyConsistent));
John Bauman19bac1e2014-05-06 15:23:49 -04001376 }
1377
Nicolas Capens19336542016-09-26 10:32:29 -04001378 Value *Nucleus::createTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001379 {
Logan Chien191b3052018-08-31 16:57:15 +08001380 return V(::builder->CreateTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001381 }
1382
Nicolas Capens19336542016-09-26 10:32:29 -04001383 Value *Nucleus::createZExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001384 {
Logan Chien191b3052018-08-31 16:57:15 +08001385 return V(::builder->CreateZExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001386 }
1387
Nicolas Capens19336542016-09-26 10:32:29 -04001388 Value *Nucleus::createSExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001389 {
Logan Chien191b3052018-08-31 16:57:15 +08001390 return V(::builder->CreateSExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001391 }
1392
Nicolas Capens19336542016-09-26 10:32:29 -04001393 Value *Nucleus::createFPToSI(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001394 {
Logan Chien191b3052018-08-31 16:57:15 +08001395 return V(::builder->CreateFPToSI(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001396 }
1397
Nicolas Capens19336542016-09-26 10:32:29 -04001398 Value *Nucleus::createSIToFP(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001399 {
Logan Chien191b3052018-08-31 16:57:15 +08001400 return V(::builder->CreateSIToFP(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001401 }
1402
Nicolas Capens19336542016-09-26 10:32:29 -04001403 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001404 {
Logan Chien191b3052018-08-31 16:57:15 +08001405 return V(::builder->CreateFPTrunc(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001406 }
1407
Nicolas Capens19336542016-09-26 10:32:29 -04001408 Value *Nucleus::createFPExt(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001409 {
Logan Chien191b3052018-08-31 16:57:15 +08001410 return V(::builder->CreateFPExt(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001411 }
1412
Nicolas Capens19336542016-09-26 10:32:29 -04001413 Value *Nucleus::createBitCast(Value *v, Type *destType)
John Bauman89401822014-05-06 15:04:28 -04001414 {
Nicolas Capens01a97962017-07-28 17:30:51 -04001415 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1416 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1417 // reading back as the destination type.
Logan Chien191b3052018-08-31 16:57:15 +08001418 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001419 {
1420 Value *readAddress = allocateStackVariable(destType);
Logan Chien191b3052018-08-31 16:57:15 +08001421 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1422 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001423 return createLoad(readAddress, destType);
1424 }
Logan Chien191b3052018-08-31 16:57:15 +08001425 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
Nicolas Capens01a97962017-07-28 17:30:51 -04001426 {
Logan Chien191b3052018-08-31 16:57:15 +08001427 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1428 createStore(v, writeAddress, T(V(v)->getType()));
Nicolas Capens01a97962017-07-28 17:30:51 -04001429 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1430 return createLoad(readAddress, destType);
1431 }
1432
Logan Chien191b3052018-08-31 16:57:15 +08001433 return V(::builder->CreateBitCast(V(v), T(destType)));
John Bauman89401822014-05-06 15:04:28 -04001434 }
1435
John Bauman89401822014-05-06 15:04:28 -04001436 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1437 {
Logan Chien191b3052018-08-31 16:57:15 +08001438 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001439 }
1440
1441 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1442 {
Logan Chien191b3052018-08-31 16:57:15 +08001443 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001444 }
1445
1446 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1447 {
Logan Chien191b3052018-08-31 16:57:15 +08001448 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001449 }
1450
1451 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1452 {
Logan Chien191b3052018-08-31 16:57:15 +08001453 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001454 }
1455
1456 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1457 {
Logan Chien191b3052018-08-31 16:57:15 +08001458 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001459 }
1460
1461 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1462 {
Logan Chien191b3052018-08-31 16:57:15 +08001463 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001464 }
1465
1466 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1467 {
Logan Chien191b3052018-08-31 16:57:15 +08001468 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001469 }
1470
1471 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1472 {
Logan Chien191b3052018-08-31 16:57:15 +08001473 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001474 }
1475
1476 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1477 {
Logan Chien191b3052018-08-31 16:57:15 +08001478 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001479 }
1480
1481 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1482 {
Logan Chien191b3052018-08-31 16:57:15 +08001483 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001484 }
1485
1486 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1487 {
Logan Chien191b3052018-08-31 16:57:15 +08001488 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001489 }
1490
1491 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1492 {
Logan Chien191b3052018-08-31 16:57:15 +08001493 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001494 }
1495
1496 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1497 {
Logan Chien191b3052018-08-31 16:57:15 +08001498 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001499 }
1500
1501 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1502 {
Logan Chien191b3052018-08-31 16:57:15 +08001503 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001504 }
1505
1506 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1507 {
Logan Chien191b3052018-08-31 16:57:15 +08001508 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001509 }
1510
1511 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1512 {
Logan Chien191b3052018-08-31 16:57:15 +08001513 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001514 }
1515
1516 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1517 {
Logan Chien191b3052018-08-31 16:57:15 +08001518 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001519 }
1520
1521 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1522 {
Logan Chien191b3052018-08-31 16:57:15 +08001523 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001524 }
1525
1526 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1527 {
Logan Chien191b3052018-08-31 16:57:15 +08001528 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001529 }
1530
1531 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1532 {
Logan Chien191b3052018-08-31 16:57:15 +08001533 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001534 }
1535
1536 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1537 {
Logan Chien191b3052018-08-31 16:57:15 +08001538 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001539 }
1540
1541 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1542 {
Logan Chien191b3052018-08-31 16:57:15 +08001543 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001544 }
1545
1546 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1547 {
Logan Chien191b3052018-08-31 16:57:15 +08001548 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001549 }
1550
1551 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1552 {
Ben Clayton71008d82019-03-05 17:17:59 +00001553 return V(::builder->CreateFCmpUNE(V(lhs), V(rhs)));
John Bauman89401822014-05-06 15:04:28 -04001554 }
1555
Nicolas Capense95d5342016-09-30 11:37:28 -04001556 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
John Bauman89401822014-05-06 15:04:28 -04001557 {
Logan Chien191b3052018-08-31 16:57:15 +08001558 assert(V(vector)->getType()->getContainedType(0) == T(type));
1559 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001560 }
1561
1562 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1563 {
Logan Chien191b3052018-08-31 16:57:15 +08001564 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
John Bauman89401822014-05-06 15:04:28 -04001565 }
1566
Logan Chien191b3052018-08-31 16:57:15 +08001567 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
John Bauman89401822014-05-06 15:04:28 -04001568 {
Logan Chien191b3052018-08-31 16:57:15 +08001569 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
Nicolas Capense89cd582016-09-30 14:23:47 -04001570 const int maxSize = 16;
1571 llvm::Constant *swizzle[maxSize];
1572 assert(size <= maxSize);
1573
1574 for(int i = 0; i < size; i++)
1575 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001576 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
Nicolas Capense89cd582016-09-30 14:23:47 -04001577 }
1578
1579 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1580
Logan Chien191b3052018-08-31 16:57:15 +08001581 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
John Bauman89401822014-05-06 15:04:28 -04001582 }
1583
Logan Chien191b3052018-08-31 16:57:15 +08001584 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
John Bauman89401822014-05-06 15:04:28 -04001585 {
Logan Chien191b3052018-08-31 16:57:15 +08001586 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
John Bauman89401822014-05-06 15:04:28 -04001587 }
1588
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001589 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
John Bauman89401822014-05-06 15:04:28 -04001590 {
Logan Chien191b3052018-08-31 16:57:15 +08001591 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
John Bauman89401822014-05-06 15:04:28 -04001592 }
1593
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001594 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
John Bauman89401822014-05-06 15:04:28 -04001595 {
Logan Chien191b3052018-08-31 16:57:15 +08001596 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1597 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
John Bauman89401822014-05-06 15:04:28 -04001598 }
1599
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001600 void Nucleus::createUnreachable()
John Bauman89401822014-05-06 15:04:28 -04001601 {
Nicolas Capens3d7c35f2016-09-28 10:36:57 -04001602 ::builder->CreateUnreachable();
John Bauman89401822014-05-06 15:04:28 -04001603 }
1604
Nicolas Capensac230122016-09-20 14:30:06 -04001605 Type *Nucleus::getPointerType(Type *ElementType)
John Bauman89401822014-05-06 15:04:28 -04001606 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001607 return T(llvm::PointerType::get(T(ElementType), 0));
John Bauman89401822014-05-06 15:04:28 -04001608 }
1609
Nicolas Capens13ac2322016-10-13 14:52:12 -04001610 Value *Nucleus::createNullValue(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001611 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001612 return V(llvm::Constant::getNullValue(T(Ty)));
John Bauman89401822014-05-06 15:04:28 -04001613 }
1614
Nicolas Capens13ac2322016-10-13 14:52:12 -04001615 Value *Nucleus::createConstantLong(int64_t i)
John Bauman89401822014-05-06 15:04:28 -04001616 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001617 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001618 }
1619
Nicolas Capens13ac2322016-10-13 14:52:12 -04001620 Value *Nucleus::createConstantInt(int i)
John Bauman89401822014-05-06 15:04:28 -04001621 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001622 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001623 }
1624
Nicolas Capens13ac2322016-10-13 14:52:12 -04001625 Value *Nucleus::createConstantInt(unsigned int i)
John Bauman89401822014-05-06 15:04:28 -04001626 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001627 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001628 }
1629
Nicolas Capens13ac2322016-10-13 14:52:12 -04001630 Value *Nucleus::createConstantBool(bool b)
John Bauman89401822014-05-06 15:04:28 -04001631 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001632 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
John Bauman89401822014-05-06 15:04:28 -04001633 }
1634
Nicolas Capens13ac2322016-10-13 14:52:12 -04001635 Value *Nucleus::createConstantByte(signed char i)
John Bauman89401822014-05-06 15:04:28 -04001636 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001637 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001638 }
1639
Nicolas Capens13ac2322016-10-13 14:52:12 -04001640 Value *Nucleus::createConstantByte(unsigned char i)
John Bauman89401822014-05-06 15:04:28 -04001641 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001642 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001643 }
1644
Nicolas Capens13ac2322016-10-13 14:52:12 -04001645 Value *Nucleus::createConstantShort(short i)
John Bauman89401822014-05-06 15:04:28 -04001646 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001647 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
John Bauman89401822014-05-06 15:04:28 -04001648 }
1649
Nicolas Capens13ac2322016-10-13 14:52:12 -04001650 Value *Nucleus::createConstantShort(unsigned short i)
John Bauman89401822014-05-06 15:04:28 -04001651 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001652 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
John Bauman89401822014-05-06 15:04:28 -04001653 }
1654
Nicolas Capens13ac2322016-10-13 14:52:12 -04001655 Value *Nucleus::createConstantFloat(float x)
John Bauman89401822014-05-06 15:04:28 -04001656 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001657 return V(llvm::ConstantFP::get(T(Float::getType()), x));
John Bauman89401822014-05-06 15:04:28 -04001658 }
1659
Nicolas Capens13ac2322016-10-13 14:52:12 -04001660 Value *Nucleus::createNullPointer(Type *Ty)
John Bauman89401822014-05-06 15:04:28 -04001661 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001662 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
John Bauman89401822014-05-06 15:04:28 -04001663 }
1664
Nicolas Capens13ac2322016-10-13 14:52:12 -04001665 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
John Bauman89401822014-05-06 15:04:28 -04001666 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001667 assert(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001668 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1669 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
1670 assert(numElements <= 16 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001671 llvm::Constant *constantVector[16];
1672
Nicolas Capens69674fb2017-09-01 11:08:44 -04001673 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001674 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001675 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001676 }
1677
Nicolas Capens69674fb2017-09-01 11:08:44 -04001678 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
Nicolas Capens13ac2322016-10-13 14:52:12 -04001679 }
1680
1681 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1682 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001683 assert(llvm::isa<llvm::VectorType>(T(type)));
Nicolas Capens69674fb2017-09-01 11:08:44 -04001684 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1685 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
1686 assert(numElements <= 8 && numConstants <= numElements);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001687 llvm::Constant *constantVector[8];
1688
Nicolas Capens69674fb2017-09-01 11:08:44 -04001689 for(int i = 0; i < numElements; i++)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001690 {
Nicolas Capens69674fb2017-09-01 11:08:44 -04001691 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001692 }
1693
Nicolas Capens69674fb2017-09-01 11:08:44 -04001694 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
John Bauman89401822014-05-06 15:04:28 -04001695 }
1696
John Bauman19bac1e2014-05-06 15:23:49 -04001697 Type *Void::getType()
John Bauman89401822014-05-06 15:04:28 -04001698 {
Nicolas Capensac230122016-09-20 14:30:06 -04001699 return T(llvm::Type::getVoidTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04001700 }
1701
John Bauman19bac1e2014-05-06 15:23:49 -04001702 Type *Bool::getType()
John Bauman89401822014-05-06 15:04:28 -04001703 {
Nicolas Capensac230122016-09-20 14:30:06 -04001704 return T(llvm::Type::getInt1Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001705 }
1706
John Bauman19bac1e2014-05-06 15:23:49 -04001707 Type *Byte::getType()
John Bauman89401822014-05-06 15:04:28 -04001708 {
Nicolas Capensac230122016-09-20 14:30:06 -04001709 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001710 }
1711
John Bauman19bac1e2014-05-06 15:23:49 -04001712 Type *SByte::getType()
John Bauman89401822014-05-06 15:04:28 -04001713 {
Nicolas Capensac230122016-09-20 14:30:06 -04001714 return T(llvm::Type::getInt8Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001715 }
1716
John Bauman19bac1e2014-05-06 15:23:49 -04001717 Type *Short::getType()
John Bauman89401822014-05-06 15:04:28 -04001718 {
Nicolas Capensac230122016-09-20 14:30:06 -04001719 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001720 }
1721
John Bauman19bac1e2014-05-06 15:23:49 -04001722 Type *UShort::getType()
John Bauman89401822014-05-06 15:04:28 -04001723 {
Nicolas Capensac230122016-09-20 14:30:06 -04001724 return T(llvm::Type::getInt16Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04001725 }
1726
John Bauman19bac1e2014-05-06 15:23:49 -04001727 Type *Byte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001728 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001729 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001730 }
1731
John Bauman19bac1e2014-05-06 15:23:49 -04001732 Type *SByte4::getType()
John Bauman89401822014-05-06 15:04:28 -04001733 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001734 return T(Type_v4i8);
John Bauman89401822014-05-06 15:04:28 -04001735 }
1736
John Bauman19bac1e2014-05-06 15:23:49 -04001737 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001738 {
Logan Chiene3191012018-08-24 22:01:50 +08001739#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001740 return x86::paddusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001741#else
1742 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
1743#endif
John Bauman89401822014-05-06 15:04:28 -04001744 }
John Bauman66b8ab22014-05-06 15:57:45 -04001745
John Bauman19bac1e2014-05-06 15:23:49 -04001746 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001747 {
Logan Chiene3191012018-08-24 22:01:50 +08001748#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001749 return x86::psubusb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001750#else
1751 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
1752#endif
John Bauman89401822014-05-06 15:04:28 -04001753 }
1754
John Bauman19bac1e2014-05-06 15:23:49 -04001755 RValue<Int> SignMask(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04001756 {
Logan Chiene3191012018-08-24 22:01:50 +08001757#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001758 return x86::pmovmskb(x);
Logan Chiene3191012018-08-24 22:01:50 +08001759#else
1760 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1761#endif
John Bauman89401822014-05-06 15:04:28 -04001762 }
1763
John Bauman19bac1e2014-05-06 15:23:49 -04001764// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001765// {
Logan Chiene3191012018-08-24 22:01:50 +08001766//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001767// return x86::pcmpgtb(x, y); // FIXME: Signedness
Logan Chiene3191012018-08-24 22:01:50 +08001768//#else
1769// return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1770//#endif
John Bauman89401822014-05-06 15:04:28 -04001771// }
John Bauman66b8ab22014-05-06 15:57:45 -04001772
John Bauman19bac1e2014-05-06 15:23:49 -04001773 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04001774 {
Logan Chiene3191012018-08-24 22:01:50 +08001775#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001776 return x86::pcmpeqb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001777#else
1778 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1779#endif
John Bauman89401822014-05-06 15:04:28 -04001780 }
1781
John Bauman19bac1e2014-05-06 15:23:49 -04001782 Type *Byte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001783 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001784 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001785 }
1786
John Bauman19bac1e2014-05-06 15:23:49 -04001787 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001788 {
Logan Chiene3191012018-08-24 22:01:50 +08001789#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001790 return x86::paddsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001791#else
1792 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
1793#endif
John Bauman89401822014-05-06 15:04:28 -04001794 }
John Bauman66b8ab22014-05-06 15:57:45 -04001795
John Bauman19bac1e2014-05-06 15:23:49 -04001796 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001797 {
Logan Chiene3191012018-08-24 22:01:50 +08001798#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001799 return x86::psubsb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001800#else
1801 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
1802#endif
John Bauman89401822014-05-06 15:04:28 -04001803 }
1804
John Bauman19bac1e2014-05-06 15:23:49 -04001805 RValue<Int> SignMask(RValue<SByte8> x)
John Bauman89401822014-05-06 15:04:28 -04001806 {
Logan Chiene3191012018-08-24 22:01:50 +08001807#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001808 return x86::pmovmskb(As<Byte8>(x));
Logan Chiene3191012018-08-24 22:01:50 +08001809#else
1810 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
1811#endif
John Bauman89401822014-05-06 15:04:28 -04001812 }
1813
John Bauman19bac1e2014-05-06 15:23:49 -04001814 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001815 {
Logan Chiene3191012018-08-24 22:01:50 +08001816#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001817 return x86::pcmpgtb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001818#else
1819 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
1820#endif
John Bauman89401822014-05-06 15:04:28 -04001821 }
John Bauman66b8ab22014-05-06 15:57:45 -04001822
John Bauman19bac1e2014-05-06 15:23:49 -04001823 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04001824 {
Logan Chiene3191012018-08-24 22:01:50 +08001825#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001826 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
Logan Chiene3191012018-08-24 22:01:50 +08001827#else
1828 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
1829#endif
John Bauman89401822014-05-06 15:04:28 -04001830 }
1831
John Bauman19bac1e2014-05-06 15:23:49 -04001832 Type *SByte8::getType()
John Bauman89401822014-05-06 15:04:28 -04001833 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001834 return T(Type_v8i8);
John Bauman89401822014-05-06 15:04:28 -04001835 }
1836
John Bauman19bac1e2014-05-06 15:23:49 -04001837 Type *Byte16::getType()
John Bauman89401822014-05-06 15:04:28 -04001838 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001839 return T(llvm::VectorType::get(T(Byte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04001840 }
1841
John Bauman19bac1e2014-05-06 15:23:49 -04001842 Type *SByte16::getType()
John Bauman89401822014-05-06 15:04:28 -04001843 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001844 return T(llvm::VectorType::get(T(SByte::getType()), 16));
John Bauman89401822014-05-06 15:04:28 -04001845 }
1846
Nicolas Capens16b5f152016-10-13 13:39:01 -04001847 Type *Short2::getType()
1848 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001849 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001850 }
1851
Nicolas Capens16b5f152016-10-13 13:39:01 -04001852 Type *UShort2::getType()
1853 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001854 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001855 }
1856
John Bauman19bac1e2014-05-06 15:23:49 -04001857 Short4::Short4(RValue<Int4> cast)
John Bauman89401822014-05-06 15:04:28 -04001858 {
Nicolas Capens01a97962017-07-28 17:30:51 -04001859 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
John Bauman89401822014-05-06 15:04:28 -04001860 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
1861
Nicolas Capens01a97962017-07-28 17:30:51 -04001862 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
1863 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
John Bauman89401822014-05-06 15:04:28 -04001864
John Bauman66b8ab22014-05-06 15:57:45 -04001865 storeValue(short4);
John Bauman89401822014-05-06 15:04:28 -04001866 }
1867
John Bauman19bac1e2014-05-06 15:23:49 -04001868// Short4::Short4(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04001869// {
1870// }
1871
John Bauman19bac1e2014-05-06 15:23:49 -04001872 Short4::Short4(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04001873 {
John Bauman89401822014-05-06 15:04:28 -04001874 Int4 v4i32 = Int4(cast);
Logan Chiene3191012018-08-24 22:01:50 +08001875#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001876 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
Logan Chiene3191012018-08-24 22:01:50 +08001877#else
1878 Value *v = v4i32.loadValue();
1879 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
1880#endif
John Bauman66b8ab22014-05-06 15:57:45 -04001881
1882 storeValue(As<Short4>(Int2(v4i32)).value);
John Bauman89401822014-05-06 15:04:28 -04001883 }
1884
John Bauman19bac1e2014-05-06 15:23:49 -04001885 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04001886 {
Logan Chiene3191012018-08-24 22:01:50 +08001887#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001888 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
1889
1890 return x86::psllw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08001891#else
1892 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
1893#endif
John Bauman89401822014-05-06 15:04:28 -04001894 }
1895
John Bauman19bac1e2014-05-06 15:23:49 -04001896 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04001897 {
Logan Chiene3191012018-08-24 22:01:50 +08001898#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001899 return x86::psraw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08001900#else
1901 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
1902#endif
John Bauman89401822014-05-06 15:04:28 -04001903 }
1904
John Bauman19bac1e2014-05-06 15:23:49 -04001905 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001906 {
Logan Chiene3191012018-08-24 22:01:50 +08001907#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001908 return x86::pmaxsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001909#else
1910 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
1911#endif
John Bauman89401822014-05-06 15:04:28 -04001912 }
1913
John Bauman19bac1e2014-05-06 15:23:49 -04001914 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001915 {
Logan Chiene3191012018-08-24 22:01:50 +08001916#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001917 return x86::pminsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001918#else
1919 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
1920#endif
John Bauman89401822014-05-06 15:04:28 -04001921 }
1922
John Bauman19bac1e2014-05-06 15:23:49 -04001923 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001924 {
Logan Chiene3191012018-08-24 22:01:50 +08001925#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001926 return x86::paddsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001927#else
1928 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
1929#endif
John Bauman89401822014-05-06 15:04:28 -04001930 }
1931
John Bauman19bac1e2014-05-06 15:23:49 -04001932 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001933 {
Logan Chiene3191012018-08-24 22:01:50 +08001934#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001935 return x86::psubsw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001936#else
1937 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
1938#endif
John Bauman89401822014-05-06 15:04:28 -04001939 }
1940
John Bauman19bac1e2014-05-06 15:23:49 -04001941 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001942 {
Logan Chiene3191012018-08-24 22:01:50 +08001943#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001944 return x86::pmulhw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001945#else
1946 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
1947#endif
John Bauman89401822014-05-06 15:04:28 -04001948 }
1949
John Bauman19bac1e2014-05-06 15:23:49 -04001950 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001951 {
Logan Chiene3191012018-08-24 22:01:50 +08001952#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001953 return x86::pmaddwd(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001954#else
1955 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
1956#endif
John Bauman89401822014-05-06 15:04:28 -04001957 }
1958
Nicolas Capens33438a62017-09-27 11:47:35 -04001959 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001960 {
Logan Chiene3191012018-08-24 22:01:50 +08001961#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04001962 auto result = x86::packsswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001963#else
1964 auto result = V(lowerPack(V(x.value), V(y.value), true));
1965#endif
Nicolas Capens01a97962017-07-28 17:30:51 -04001966 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
John Bauman89401822014-05-06 15:04:28 -04001967 }
1968
Nicolas Capens33438a62017-09-27 11:47:35 -04001969 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
1970 {
Logan Chiene3191012018-08-24 22:01:50 +08001971#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04001972 auto result = x86::packuswb(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001973#else
1974 auto result = V(lowerPack(V(x.value), V(y.value), false));
1975#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04001976 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
1977 }
1978
John Bauman19bac1e2014-05-06 15:23:49 -04001979 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001980 {
Logan Chiene3191012018-08-24 22:01:50 +08001981#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001982 return x86::pcmpgtw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001983#else
1984 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
1985#endif
John Bauman89401822014-05-06 15:04:28 -04001986 }
1987
John Bauman19bac1e2014-05-06 15:23:49 -04001988 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04001989 {
Logan Chiene3191012018-08-24 22:01:50 +08001990#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04001991 return x86::pcmpeqw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08001992#else
1993 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
1994#endif
John Bauman89401822014-05-06 15:04:28 -04001995 }
1996
John Bauman19bac1e2014-05-06 15:23:49 -04001997 Type *Short4::getType()
John Bauman89401822014-05-06 15:04:28 -04001998 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04001999 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002000 }
2001
John Bauman19bac1e2014-05-06 15:23:49 -04002002 UShort4::UShort4(RValue<Float4> cast, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04002003 {
John Bauman89401822014-05-06 15:04:28 -04002004 if(saturate)
2005 {
Logan Chiena8385ed2018-09-26 19:22:54 +08002006#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002007 if(CPUID::supportsSSE4_1())
2008 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002009 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
Nicolas Capens33438a62017-09-27 11:47:35 -04002010 *this = As<Short4>(PackUnsigned(int4, int4));
John Bauman89401822014-05-06 15:04:28 -04002011 }
2012 else
Logan Chiena8385ed2018-09-26 19:22:54 +08002013#endif
John Bauman89401822014-05-06 15:04:28 -04002014 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002015 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
John Bauman89401822014-05-06 15:04:28 -04002016 }
2017 }
2018 else
2019 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002020 *this = Short4(Int4(cast));
John Bauman89401822014-05-06 15:04:28 -04002021 }
2022 }
2023
John Bauman19bac1e2014-05-06 15:23:49 -04002024 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002025 {
Logan Chiene3191012018-08-24 22:01:50 +08002026#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002027 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2028
2029 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002030#else
2031 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
2032#endif
John Bauman89401822014-05-06 15:04:28 -04002033 }
2034
John Bauman19bac1e2014-05-06 15:23:49 -04002035 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002036 {
Logan Chiene3191012018-08-24 22:01:50 +08002037#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002038 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
2039
2040 return x86::psrlw(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002041#else
2042 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2043#endif
John Bauman89401822014-05-06 15:04:28 -04002044 }
2045
John Bauman19bac1e2014-05-06 15:23:49 -04002046 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002047 {
John Bauman66b8ab22014-05-06 15:57:45 -04002048 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002049 }
2050
John Bauman19bac1e2014-05-06 15:23:49 -04002051 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002052 {
John Bauman66b8ab22014-05-06 15:57:45 -04002053 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
John Bauman89401822014-05-06 15:04:28 -04002054 }
2055
John Bauman19bac1e2014-05-06 15:23:49 -04002056 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002057 {
Logan Chiene3191012018-08-24 22:01:50 +08002058#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002059 return x86::paddusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002060#else
2061 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2062#endif
John Bauman89401822014-05-06 15:04:28 -04002063 }
2064
John Bauman19bac1e2014-05-06 15:23:49 -04002065 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002066 {
Logan Chiene3191012018-08-24 22:01:50 +08002067#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002068 return x86::psubusw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002069#else
2070 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2071#endif
John Bauman89401822014-05-06 15:04:28 -04002072 }
2073
John Bauman19bac1e2014-05-06 15:23:49 -04002074 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002075 {
Logan Chiene3191012018-08-24 22:01:50 +08002076#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002077 return x86::pmulhuw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002078#else
2079 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2080#endif
John Bauman89401822014-05-06 15:04:28 -04002081 }
2082
John Bauman19bac1e2014-05-06 15:23:49 -04002083 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04002084 {
Logan Chiene3191012018-08-24 22:01:50 +08002085#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002086 return x86::pavgw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002087#else
2088 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
2089#endif
John Bauman89401822014-05-06 15:04:28 -04002090 }
2091
John Bauman19bac1e2014-05-06 15:23:49 -04002092 Type *UShort4::getType()
John Bauman89401822014-05-06 15:04:28 -04002093 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002094 return T(Type_v4i16);
John Bauman89401822014-05-06 15:04:28 -04002095 }
2096
John Bauman19bac1e2014-05-06 15:23:49 -04002097 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002098 {
Logan Chiene3191012018-08-24 22:01:50 +08002099#if defined(__i386__) || defined(__x86_64__)
2100 return x86::psllw(lhs, rhs);
2101#else
2102 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
2103#endif
John Bauman89401822014-05-06 15:04:28 -04002104 }
2105
John Bauman19bac1e2014-05-06 15:23:49 -04002106 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002107 {
Logan Chiene3191012018-08-24 22:01:50 +08002108#if defined(__i386__) || defined(__x86_64__)
2109 return x86::psraw(lhs, rhs);
2110#else
2111 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
2112#endif
John Bauman89401822014-05-06 15:04:28 -04002113 }
2114
John Bauman19bac1e2014-05-06 15:23:49 -04002115 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002116 {
Logan Chiene3191012018-08-24 22:01:50 +08002117#if defined(__i386__) || defined(__x86_64__)
2118 return x86::pmaddwd(x, y);
2119#else
2120 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
2121#endif
John Bauman89401822014-05-06 15:04:28 -04002122 }
2123
John Bauman19bac1e2014-05-06 15:23:49 -04002124 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04002125 {
Logan Chiene3191012018-08-24 22:01:50 +08002126#if defined(__i386__) || defined(__x86_64__)
2127 return x86::pmulhw(x, y);
2128#else
2129 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2130#endif
John Bauman89401822014-05-06 15:04:28 -04002131 }
2132
John Bauman19bac1e2014-05-06 15:23:49 -04002133 Type *Short8::getType()
John Bauman89401822014-05-06 15:04:28 -04002134 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002135 return T(llvm::VectorType::get(T(Short::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002136 }
2137
John Bauman19bac1e2014-05-06 15:23:49 -04002138 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002139 {
Logan Chiene3191012018-08-24 22:01:50 +08002140#if defined(__i386__) || defined(__x86_64__)
2141 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
2142#else
2143 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
2144#endif
John Bauman89401822014-05-06 15:04:28 -04002145 }
2146
John Bauman19bac1e2014-05-06 15:23:49 -04002147 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002148 {
Logan Chiene3191012018-08-24 22:01:50 +08002149#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002150 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
Logan Chiene3191012018-08-24 22:01:50 +08002151#else
2152 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
2153#endif
John Bauman89401822014-05-06 15:04:28 -04002154 }
2155
John Bauman19bac1e2014-05-06 15:23:49 -04002156 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
John Bauman89401822014-05-06 15:04:28 -04002157 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002158 int pshufb[16] =
2159 {
2160 select0 + 0,
2161 select0 + 1,
2162 select1 + 0,
2163 select1 + 1,
2164 select2 + 0,
2165 select2 + 1,
2166 select3 + 0,
2167 select3 + 1,
2168 select4 + 0,
2169 select4 + 1,
2170 select5 + 0,
2171 select5 + 1,
2172 select6 + 0,
2173 select6 + 1,
2174 select7 + 0,
2175 select7 + 1,
2176 };
John Bauman89401822014-05-06 15:04:28 -04002177
2178 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
Nicolas Capense89cd582016-09-30 14:23:47 -04002179 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
John Bauman89401822014-05-06 15:04:28 -04002180 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
2181
2182 return RValue<UShort8>(short8);
2183 }
2184
John Bauman19bac1e2014-05-06 15:23:49 -04002185 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04002186 {
Logan Chiene3191012018-08-24 22:01:50 +08002187#if defined(__i386__) || defined(__x86_64__)
2188 return x86::pmulhuw(x, y);
2189#else
2190 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2191#endif
John Bauman89401822014-05-06 15:04:28 -04002192 }
2193
John Bauman19bac1e2014-05-06 15:23:49 -04002194 Type *UShort8::getType()
John Bauman89401822014-05-06 15:04:28 -04002195 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002196 return T(llvm::VectorType::get(T(UShort::getType()), 8));
John Bauman89401822014-05-06 15:04:28 -04002197 }
2198
Nicolas Capens96d4e092016-11-18 14:22:38 -05002199 RValue<Int> operator++(Int &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002200 {
2201 RValue<Int> res = val;
2202
Logan Chien191b3052018-08-31 16:57:15 +08002203 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002204 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002205
2206 return res;
2207 }
2208
Nicolas Capens96d4e092016-11-18 14:22:38 -05002209 const Int &operator++(Int &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002210 {
Logan Chien191b3052018-08-31 16:57:15 +08002211 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002212 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002213
2214 return val;
2215 }
2216
Nicolas Capens96d4e092016-11-18 14:22:38 -05002217 RValue<Int> operator--(Int &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002218 {
2219 RValue<Int> res = val;
2220
Logan Chien191b3052018-08-31 16:57:15 +08002221 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002222 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002223
2224 return res;
2225 }
2226
Nicolas Capens96d4e092016-11-18 14:22:38 -05002227 const Int &operator--(Int &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002228 {
Logan Chien191b3052018-08-31 16:57:15 +08002229 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002230 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002231
2232 return val;
2233 }
2234
John Bauman19bac1e2014-05-06 15:23:49 -04002235 RValue<Int> RoundInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002236 {
Logan Chiene3191012018-08-24 22:01:50 +08002237#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002238 return x86::cvtss2si(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002239#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002240 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002241#endif
John Bauman89401822014-05-06 15:04:28 -04002242 }
2243
John Bauman19bac1e2014-05-06 15:23:49 -04002244 Type *Int::getType()
John Bauman89401822014-05-06 15:04:28 -04002245 {
Nicolas Capensac230122016-09-20 14:30:06 -04002246 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002247 }
2248
John Bauman19bac1e2014-05-06 15:23:49 -04002249 Type *Long::getType()
John Bauman89401822014-05-06 15:04:28 -04002250 {
Nicolas Capensac230122016-09-20 14:30:06 -04002251 return T(llvm::Type::getInt64Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002252 }
2253
John Bauman19bac1e2014-05-06 15:23:49 -04002254 UInt::UInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002255 {
Alexis Hetu764d1422016-09-28 08:44:22 -04002256 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2257 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
John Bauman89401822014-05-06 15:04:28 -04002258
Alexis Hetu764d1422016-09-28 08:44:22 -04002259 // Smallest positive value representable in UInt, but not in Int
2260 const unsigned int ustart = 0x80000000u;
2261 const float ustartf = float(ustart);
2262
2263 // If the value is negative, store 0, otherwise store the result of the conversion
2264 storeValue((~(As<Int>(cast) >> 31) &
2265 // Check if the value can be represented as an Int
2266 IfThenElse(cast >= ustartf,
2267 // If the value is too large, subtract ustart and re-add it after conversion.
2268 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2269 // Otherwise, just convert normally
2270 Int(cast))).value);
John Bauman89401822014-05-06 15:04:28 -04002271 }
2272
Nicolas Capens96d4e092016-11-18 14:22:38 -05002273 RValue<UInt> operator++(UInt &val, int) // Post-increment
John Bauman89401822014-05-06 15:04:28 -04002274 {
2275 RValue<UInt> res = val;
2276
Logan Chien191b3052018-08-31 16:57:15 +08002277 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002278 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002279
2280 return res;
2281 }
2282
Nicolas Capens96d4e092016-11-18 14:22:38 -05002283 const UInt &operator++(UInt &val) // Pre-increment
John Bauman89401822014-05-06 15:04:28 -04002284 {
Logan Chien191b3052018-08-31 16:57:15 +08002285 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002286 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002287
2288 return val;
2289 }
2290
Nicolas Capens96d4e092016-11-18 14:22:38 -05002291 RValue<UInt> operator--(UInt &val, int) // Post-decrement
John Bauman89401822014-05-06 15:04:28 -04002292 {
2293 RValue<UInt> res = val;
2294
Logan Chien191b3052018-08-31 16:57:15 +08002295 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002296 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002297
2298 return res;
2299 }
2300
Nicolas Capens96d4e092016-11-18 14:22:38 -05002301 const UInt &operator--(UInt &val) // Pre-decrement
John Bauman89401822014-05-06 15:04:28 -04002302 {
Logan Chien191b3052018-08-31 16:57:15 +08002303 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
John Bauman66b8ab22014-05-06 15:57:45 -04002304 val.storeValue(inc);
John Bauman89401822014-05-06 15:04:28 -04002305
2306 return val;
2307 }
2308
John Bauman19bac1e2014-05-06 15:23:49 -04002309// RValue<UInt> RoundUInt(RValue<Float> cast)
John Bauman89401822014-05-06 15:04:28 -04002310// {
Logan Chiene3191012018-08-24 22:01:50 +08002311//#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002312// return x86::cvtss2si(val); // FIXME: Unsigned
Logan Chiene3191012018-08-24 22:01:50 +08002313//#else
2314// return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
2315//#endif
John Bauman89401822014-05-06 15:04:28 -04002316// }
2317
John Bauman19bac1e2014-05-06 15:23:49 -04002318 Type *UInt::getType()
John Bauman89401822014-05-06 15:04:28 -04002319 {
Nicolas Capensac230122016-09-20 14:30:06 -04002320 return T(llvm::Type::getInt32Ty(*::context));
John Bauman89401822014-05-06 15:04:28 -04002321 }
2322
John Bauman19bac1e2014-05-06 15:23:49 -04002323// Int2::Int2(RValue<Int> cast)
2324// {
John Bauman19bac1e2014-05-06 15:23:49 -04002325// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2326// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
John Bauman66b8ab22014-05-06 15:57:45 -04002327//
Nicolas Capense89cd582016-09-30 14:23:47 -04002328// int shuffle[2] = {0, 0};
2329// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
John Bauman19bac1e2014-05-06 15:23:49 -04002330//
John Bauman66b8ab22014-05-06 15:57:45 -04002331// storeValue(replicate);
John Bauman19bac1e2014-05-06 15:23:49 -04002332// }
John Bauman89401822014-05-06 15:04:28 -04002333
John Bauman19bac1e2014-05-06 15:23:49 -04002334 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002335 {
Logan Chiene3191012018-08-24 22:01:50 +08002336#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002337 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
2338
2339 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002340#else
2341 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
2342#endif
John Bauman89401822014-05-06 15:04:28 -04002343 }
2344
John Bauman19bac1e2014-05-06 15:23:49 -04002345 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002346 {
Logan Chiene3191012018-08-24 22:01:50 +08002347#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002348 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
2349
2350 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002351#else
2352 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
2353#endif
John Bauman89401822014-05-06 15:04:28 -04002354 }
2355
John Bauman19bac1e2014-05-06 15:23:49 -04002356 Type *Int2::getType()
John Bauman89401822014-05-06 15:04:28 -04002357 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002358 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002359 }
2360
John Bauman19bac1e2014-05-06 15:23:49 -04002361 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002362 {
Logan Chiene3191012018-08-24 22:01:50 +08002363#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002364 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
2365
2366 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002367#else
2368 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
2369#endif
John Bauman89401822014-05-06 15:04:28 -04002370 }
2371
John Bauman19bac1e2014-05-06 15:23:49 -04002372 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002373 {
Logan Chiene3191012018-08-24 22:01:50 +08002374#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002375 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
2376
2377 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002378#else
2379 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
2380#endif
John Bauman89401822014-05-06 15:04:28 -04002381 }
2382
John Bauman19bac1e2014-05-06 15:23:49 -04002383 Type *UInt2::getType()
John Bauman89401822014-05-06 15:04:28 -04002384 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002385 return T(Type_v2i32);
John Bauman89401822014-05-06 15:04:28 -04002386 }
2387
Nicolas Capenscb986762017-01-20 11:34:37 -05002388 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002389 {
Logan Chiene3191012018-08-24 22:01:50 +08002390#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002391 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002392 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002393 *this = x86::pmovzxbd(As<Byte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002394 }
2395 else
Logan Chiene3191012018-08-24 22:01:50 +08002396#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002397 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002398 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
Nicolas Capens01a97962017-07-28 17:30:51 -04002399 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002400 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002401
Nicolas Capense89cd582016-09-30 14:23:47 -04002402 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002403 Value *c = Nucleus::createBitCast(b, Short8::getType());
Logan Chien191b3052018-08-31 16:57:15 +08002404 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002405
Nicolas Capens01a97962017-07-28 17:30:51 -04002406 *this = As<Int4>(d);
2407 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002408 }
2409
Nicolas Capenscb986762017-01-20 11:34:37 -05002410 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002411 {
Logan Chiene3191012018-08-24 22:01:50 +08002412#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens01a97962017-07-28 17:30:51 -04002413 if(CPUID::supportsSSE4_1())
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002414 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002415 *this = x86::pmovsxbd(As<SByte16>(cast));
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002416 }
2417 else
Logan Chiene3191012018-08-24 22:01:50 +08002418#endif
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002419 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002420 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2421 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
2422 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002423
Nicolas Capense89cd582016-09-30 14:23:47 -04002424 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002425 Value *c = Nucleus::createBitCast(b, Short8::getType());
2426 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002427
Nicolas Capens01a97962017-07-28 17:30:51 -04002428 *this = As<Int4>(d) >> 24;
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002429 }
Meng-Lin Wu601d0052016-06-10 14:18:41 -04002430 }
2431
Nicolas Capenscb986762017-01-20 11:34:37 -05002432 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002433 {
Logan Chiene3191012018-08-24 22:01:50 +08002434#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002435 if(CPUID::supportsSSE4_1())
2436 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002437 *this = x86::pmovsxwd(As<Short8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002438 }
2439 else
Logan Chiene3191012018-08-24 22:01:50 +08002440#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002441 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002442 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Nicolas Capens01a97962017-07-28 17:30:51 -04002443 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2444 *this = As<Int4>(c) >> 16;
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002445 }
2446 }
2447
Nicolas Capenscb986762017-01-20 11:34:37 -05002448 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002449 {
Logan Chiene3191012018-08-24 22:01:50 +08002450#if defined(__i386__) || defined(__x86_64__)
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002451 if(CPUID::supportsSSE4_1())
2452 {
Nicolas Capens01a97962017-07-28 17:30:51 -04002453 *this = x86::pmovzxwd(As<UShort8>(cast));
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002454 }
2455 else
Logan Chiene3191012018-08-24 22:01:50 +08002456#endif
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002457 {
Nicolas Capense89cd582016-09-30 14:23:47 -04002458 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
Nicolas Capens01a97962017-07-28 17:30:51 -04002459 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2460 *this = As<Int4>(c);
Alexis Hetu2aa852f2015-10-14 16:32:39 -04002461 }
2462 }
2463
Nicolas Capenscb986762017-01-20 11:34:37 -05002464 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002465 {
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002466 Value *vector = loadValue();
2467 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2468
Nicolas Capense89cd582016-09-30 14:23:47 -04002469 int swizzle[4] = {0, 0, 0, 0};
2470 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
Nicolas Capens24c8cf02016-08-15 15:33:14 -04002471
2472 storeValue(replicate);
2473 }
2474
John Bauman19bac1e2014-05-06 15:23:49 -04002475 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002476 {
Logan Chiene3191012018-08-24 22:01:50 +08002477#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002478 return x86::pslld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002479#else
2480 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
2481#endif
John Bauman89401822014-05-06 15:04:28 -04002482 }
2483
John Bauman19bac1e2014-05-06 15:23:49 -04002484 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002485 {
Logan Chiene3191012018-08-24 22:01:50 +08002486#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002487 return x86::psrad(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002488#else
2489 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
2490#endif
John Bauman89401822014-05-06 15:04:28 -04002491 }
2492
John Bauman19bac1e2014-05-06 15:23:49 -04002493 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2494 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002495 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002496 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2497 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2498 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002499 }
2500
2501 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2502 {
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002503 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2504 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2505 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
2506 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002507 }
2508
2509 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2510 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002511 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2512 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2513 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
2514 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002515 }
2516
2517 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2518 {
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002519 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2520 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2521 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2522 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002523 }
2524
2525 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2526 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002527 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2528 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2529 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
2530 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002531 }
2532
2533 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2534 {
Nicolas Capens9ae6cfd2017-11-27 14:58:53 -05002535 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2536 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2537 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
2538 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002539 }
2540
2541 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2542 {
Logan Chiene3191012018-08-24 22:01:50 +08002543#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002544 if(CPUID::supportsSSE4_1())
2545 {
2546 return x86::pmaxsd(x, y);
2547 }
2548 else
Logan Chiene3191012018-08-24 22:01:50 +08002549#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002550 {
2551 RValue<Int4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002552 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002553 }
2554 }
2555
2556 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2557 {
Logan Chiene3191012018-08-24 22:01:50 +08002558#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002559 if(CPUID::supportsSSE4_1())
2560 {
2561 return x86::pminsd(x, y);
2562 }
2563 else
Logan Chiene3191012018-08-24 22:01:50 +08002564#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002565 {
2566 RValue<Int4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002567 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002568 }
2569 }
2570
2571 RValue<Int4> RoundInt(RValue<Float4> cast)
John Bauman89401822014-05-06 15:04:28 -04002572 {
Logan Chiene3191012018-08-24 22:01:50 +08002573#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002574 return x86::cvtps2dq(cast);
Logan Chiene3191012018-08-24 22:01:50 +08002575#else
Logan Chien2faa24a2018-09-26 19:59:32 +08002576 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
Logan Chiene3191012018-08-24 22:01:50 +08002577#endif
John Bauman89401822014-05-06 15:04:28 -04002578 }
2579
Chris Forbese86b6dc2019-03-01 09:08:47 -08002580 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2581 {
2582 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2583 return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
2584 }
2585
2586 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2587 {
2588 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2589 return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
2590 }
2591
Nicolas Capens33438a62017-09-27 11:47:35 -04002592 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04002593 {
Logan Chiene3191012018-08-24 22:01:50 +08002594#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002595 return x86::packssdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002596#else
2597 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
2598#endif
John Bauman89401822014-05-06 15:04:28 -04002599 }
2600
Nicolas Capens33438a62017-09-27 11:47:35 -04002601 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2602 {
Logan Chiene3191012018-08-24 22:01:50 +08002603#if defined(__i386__) || defined(__x86_64__)
Nicolas Capens33438a62017-09-27 11:47:35 -04002604 return x86::packusdw(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002605#else
2606 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
2607#endif
Nicolas Capens33438a62017-09-27 11:47:35 -04002608 }
2609
John Bauman19bac1e2014-05-06 15:23:49 -04002610 RValue<Int> SignMask(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04002611 {
Logan Chiene3191012018-08-24 22:01:50 +08002612#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002613 return x86::movmskps(As<Float4>(x));
Logan Chiene3191012018-08-24 22:01:50 +08002614#else
2615 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2616#endif
John Bauman89401822014-05-06 15:04:28 -04002617 }
2618
John Bauman19bac1e2014-05-06 15:23:49 -04002619 Type *Int4::getType()
John Bauman89401822014-05-06 15:04:28 -04002620 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002621 return T(llvm::VectorType::get(T(Int::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002622 }
2623
Nicolas Capenscb986762017-01-20 11:34:37 -05002624 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002625 {
Alexis Hetu764d1422016-09-28 08:44:22 -04002626 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
2627 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
John Bauman89401822014-05-06 15:04:28 -04002628
Alexis Hetu764d1422016-09-28 08:44:22 -04002629 // Smallest positive value representable in UInt, but not in Int
2630 const unsigned int ustart = 0x80000000u;
2631 const float ustartf = float(ustart);
2632
2633 // Check if the value can be represented as an Int
2634 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2635 // If the value is too large, subtract ustart and re-add it after conversion.
2636 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2637 // Otherwise, just convert normally
2638 (~uiValue & Int4(cast));
2639 // If the value is negative, store 0, otherwise store the result of the conversion
2640 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
John Bauman89401822014-05-06 15:04:28 -04002641 }
2642
John Bauman19bac1e2014-05-06 15:23:49 -04002643 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002644 {
Logan Chiene3191012018-08-24 22:01:50 +08002645#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002646 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
Logan Chiene3191012018-08-24 22:01:50 +08002647#else
2648 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
2649#endif
John Bauman89401822014-05-06 15:04:28 -04002650 }
2651
John Bauman19bac1e2014-05-06 15:23:49 -04002652 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
John Bauman89401822014-05-06 15:04:28 -04002653 {
Logan Chiene3191012018-08-24 22:01:50 +08002654#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002655 return x86::psrld(lhs, rhs);
Logan Chiene3191012018-08-24 22:01:50 +08002656#else
2657 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
2658#endif
John Bauman89401822014-05-06 15:04:28 -04002659 }
2660
John Bauman19bac1e2014-05-06 15:23:49 -04002661 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
2662 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002663 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
Alexis Hetufb603992016-04-26 11:50:40 -04002664 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2665 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
2666 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002667 }
2668
2669 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
2670 {
2671 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
2672 }
2673
2674 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
2675 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002676 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2677 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2678 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
2679 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002680 }
2681
2682 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
2683 {
2684 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
2685 }
2686
2687 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
2688 {
Nicolas Capens197226a2016-04-27 23:08:50 -04002689 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
2690 // Restore the following line when LLVM is updated to a version where this issue is fixed.
2691 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
2692 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
John Bauman19bac1e2014-05-06 15:23:49 -04002693 }
2694
2695 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
2696 {
2697 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
2698 }
2699
2700 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
2701 {
Logan Chiene3191012018-08-24 22:01:50 +08002702#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002703 if(CPUID::supportsSSE4_1())
2704 {
2705 return x86::pmaxud(x, y);
2706 }
2707 else
Logan Chiene3191012018-08-24 22:01:50 +08002708#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002709 {
2710 RValue<UInt4> greater = CmpNLE(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002711 return (x & greater) | (y & ~greater);
John Bauman19bac1e2014-05-06 15:23:49 -04002712 }
2713 }
2714
2715 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
2716 {
Logan Chiene3191012018-08-24 22:01:50 +08002717#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002718 if(CPUID::supportsSSE4_1())
2719 {
2720 return x86::pminud(x, y);
2721 }
2722 else
Logan Chiene3191012018-08-24 22:01:50 +08002723#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002724 {
2725 RValue<UInt4> less = CmpLT(x, y);
Tom Anderson69bc6e82017-03-20 11:54:29 -07002726 return (x & less) | (y & ~less);
John Bauman19bac1e2014-05-06 15:23:49 -04002727 }
2728 }
2729
John Bauman19bac1e2014-05-06 15:23:49 -04002730 Type *UInt4::getType()
John Bauman89401822014-05-06 15:04:28 -04002731 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002732 return T(llvm::VectorType::get(T(UInt::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04002733 }
2734
Alexis Hetu734e2572018-12-20 14:00:49 -05002735 Type *Half::getType()
2736 {
2737 return T(llvm::Type::getInt16Ty(*::context));
2738 }
2739
Nicolas Capens05b3d662016-02-25 23:58:33 -05002740 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04002741 {
Logan Chiene3191012018-08-24 22:01:50 +08002742#if defined(__i386__) || defined(__x86_64__)
2743 if(exactAtPow2)
2744 {
2745 // rcpss uses a piecewise-linear approximation which minimizes the relative error
2746 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
2747 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
2748 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04002749 return x86::rcpss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002750#else
2751 return As<Float>(V(lowerRCP(V(x.value))));
2752#endif
John Bauman89401822014-05-06 15:04:28 -04002753 }
John Bauman66b8ab22014-05-06 15:57:45 -04002754
John Bauman19bac1e2014-05-06 15:23:49 -04002755 RValue<Float> RcpSqrt_pp(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002756 {
Logan Chiene3191012018-08-24 22:01:50 +08002757#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002758 return x86::rsqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002759#else
2760 return As<Float>(V(lowerRSQRT(V(x.value))));
2761#endif
John Bauman89401822014-05-06 15:04:28 -04002762 }
2763
John Bauman19bac1e2014-05-06 15:23:49 -04002764 RValue<Float> Sqrt(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002765 {
Logan Chiene3191012018-08-24 22:01:50 +08002766#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002767 return x86::sqrtss(x);
Logan Chiene3191012018-08-24 22:01:50 +08002768#else
2769 return As<Float>(V(lowerSQRT(V(x.value))));
2770#endif
John Bauman89401822014-05-06 15:04:28 -04002771 }
2772
John Bauman19bac1e2014-05-06 15:23:49 -04002773 RValue<Float> Round(RValue<Float> x)
2774 {
Logan Chiene3191012018-08-24 22:01:50 +08002775#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002776 if(CPUID::supportsSSE4_1())
2777 {
2778 return x86::roundss(x, 0);
2779 }
2780 else
2781 {
2782 return Float4(Round(Float4(x))).x;
2783 }
Logan Chien83fc07a2018-09-26 22:14:00 +08002784#else
2785 return RValue<Float>(V(lowerRound(V(x.value))));
2786#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002787 }
2788
2789 RValue<Float> Trunc(RValue<Float> x)
2790 {
Logan Chiene3191012018-08-24 22:01:50 +08002791#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002792 if(CPUID::supportsSSE4_1())
2793 {
2794 return x86::roundss(x, 3);
2795 }
2796 else
2797 {
2798 return Float(Int(x)); // Rounded toward zero
2799 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08002800#else
2801 return RValue<Float>(V(lowerTrunc(V(x.value))));
2802#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002803 }
2804
2805 RValue<Float> Frac(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002806 {
Logan Chiene3191012018-08-24 22:01:50 +08002807#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002808 if(CPUID::supportsSSE4_1())
2809 {
2810 return x - x86::floorss(x);
2811 }
2812 else
2813 {
John Bauman19bac1e2014-05-06 15:23:49 -04002814 return Float4(Frac(Float4(x))).x;
John Bauman89401822014-05-06 15:04:28 -04002815 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08002816#else
2817 // x - floor(x) can be 1.0 for very small negative x.
2818 // Clamp against the value just below 1.0.
2819 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
2820#endif
John Bauman89401822014-05-06 15:04:28 -04002821 }
2822
John Bauman19bac1e2014-05-06 15:23:49 -04002823 RValue<Float> Floor(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002824 {
Logan Chiene3191012018-08-24 22:01:50 +08002825#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002826 if(CPUID::supportsSSE4_1())
2827 {
2828 return x86::floorss(x);
2829 }
2830 else
2831 {
2832 return Float4(Floor(Float4(x))).x;
2833 }
Logan Chien40a60052018-09-26 19:03:53 +08002834#else
2835 return RValue<Float>(V(lowerFloor(V(x.value))));
2836#endif
John Bauman89401822014-05-06 15:04:28 -04002837 }
2838
John Bauman19bac1e2014-05-06 15:23:49 -04002839 RValue<Float> Ceil(RValue<Float> x)
John Bauman89401822014-05-06 15:04:28 -04002840 {
Logan Chiene3191012018-08-24 22:01:50 +08002841#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04002842 if(CPUID::supportsSSE4_1())
2843 {
2844 return x86::ceilss(x);
2845 }
2846 else
Logan Chiene3191012018-08-24 22:01:50 +08002847#endif
John Bauman19bac1e2014-05-06 15:23:49 -04002848 {
2849 return Float4(Ceil(Float4(x))).x;
2850 }
John Bauman89401822014-05-06 15:04:28 -04002851 }
2852
John Bauman19bac1e2014-05-06 15:23:49 -04002853 Type *Float::getType()
John Bauman89401822014-05-06 15:04:28 -04002854 {
Nicolas Capensac230122016-09-20 14:30:06 -04002855 return T(llvm::Type::getFloatTy(*::context));
John Bauman89401822014-05-06 15:04:28 -04002856 }
2857
John Bauman19bac1e2014-05-06 15:23:49 -04002858 Type *Float2::getType()
John Bauman89401822014-05-06 15:04:28 -04002859 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04002860 return T(Type_v2f32);
John Bauman89401822014-05-06 15:04:28 -04002861 }
2862
Nicolas Capenscb986762017-01-20 11:34:37 -05002863 Float4::Float4(RValue<Float> rhs) : XYZW(this)
John Bauman89401822014-05-06 15:04:28 -04002864 {
John Bauman66b8ab22014-05-06 15:57:45 -04002865 Value *vector = loadValue();
John Bauman89401822014-05-06 15:04:28 -04002866 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
2867
Nicolas Capense89cd582016-09-30 14:23:47 -04002868 int swizzle[4] = {0, 0, 0, 0};
2869 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
John Bauman89401822014-05-06 15:04:28 -04002870
John Bauman66b8ab22014-05-06 15:57:45 -04002871 storeValue(replicate);
John Bauman89401822014-05-06 15:04:28 -04002872 }
2873
John Bauman19bac1e2014-05-06 15:23:49 -04002874 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002875 {
Logan Chiene3191012018-08-24 22:01:50 +08002876#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002877 return x86::maxps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002878#else
2879 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
2880#endif
John Bauman89401822014-05-06 15:04:28 -04002881 }
2882
John Bauman19bac1e2014-05-06 15:23:49 -04002883 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002884 {
Logan Chiene3191012018-08-24 22:01:50 +08002885#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002886 return x86::minps(x, y);
Logan Chiene3191012018-08-24 22:01:50 +08002887#else
2888 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
2889#endif
John Bauman89401822014-05-06 15:04:28 -04002890 }
2891
Nicolas Capens05b3d662016-02-25 23:58:33 -05002892 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
John Bauman89401822014-05-06 15:04:28 -04002893 {
Logan Chiene3191012018-08-24 22:01:50 +08002894#if defined(__i386__) || defined(__x86_64__)
2895 if(exactAtPow2)
2896 {
2897 // rcpps uses a piecewise-linear approximation which minimizes the relative error
2898 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
2899 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
2900 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04002901 return x86::rcpps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002902#else
2903 return As<Float4>(V(lowerRCP(V(x.value))));
2904#endif
John Bauman89401822014-05-06 15:04:28 -04002905 }
John Bauman66b8ab22014-05-06 15:57:45 -04002906
John Bauman19bac1e2014-05-06 15:23:49 -04002907 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04002908 {
Logan Chiene3191012018-08-24 22:01:50 +08002909#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002910 return x86::rsqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002911#else
2912 return As<Float4>(V(lowerRSQRT(V(x.value))));
2913#endif
John Bauman89401822014-05-06 15:04:28 -04002914 }
2915
John Bauman19bac1e2014-05-06 15:23:49 -04002916 RValue<Float4> Sqrt(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04002917 {
Logan Chiene3191012018-08-24 22:01:50 +08002918#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002919 return x86::sqrtps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002920#else
2921 return As<Float4>(V(lowerSQRT(V(x.value))));
2922#endif
John Bauman89401822014-05-06 15:04:28 -04002923 }
2924
John Bauman19bac1e2014-05-06 15:23:49 -04002925 RValue<Int> SignMask(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04002926 {
Logan Chiene3191012018-08-24 22:01:50 +08002927#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04002928 return x86::movmskps(x);
Logan Chiene3191012018-08-24 22:01:50 +08002929#else
2930 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
2931#endif
John Bauman89401822014-05-06 15:04:28 -04002932 }
2933
John Bauman19bac1e2014-05-06 15:23:49 -04002934 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002935 {
2936 // return As<Int4>(x86::cmpeqps(x, y));
2937 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
2938 }
2939
John Bauman19bac1e2014-05-06 15:23:49 -04002940 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002941 {
2942 // return As<Int4>(x86::cmpltps(x, y));
2943 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
2944 }
2945
John Bauman19bac1e2014-05-06 15:23:49 -04002946 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002947 {
2948 // return As<Int4>(x86::cmpleps(x, y));
2949 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
2950 }
2951
John Bauman19bac1e2014-05-06 15:23:49 -04002952 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002953 {
2954 // return As<Int4>(x86::cmpneqps(x, y));
2955 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
2956 }
2957
John Bauman19bac1e2014-05-06 15:23:49 -04002958 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002959 {
2960 // return As<Int4>(x86::cmpnltps(x, y));
2961 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
2962 }
2963
John Bauman19bac1e2014-05-06 15:23:49 -04002964 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04002965 {
2966 // return As<Int4>(x86::cmpnleps(x, y));
2967 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
2968 }
2969
Ben Claytonec1aeb82019-03-04 19:33:27 +00002970 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
2971 {
2972 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUEQ(x.value, y.value), Int4::getType()));
2973 }
2974
2975 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
2976 {
2977 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULT(x.value, y.value), Int4::getType()));
2978 }
2979
2980 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
2981 {
2982 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpULE(x.value, y.value), Int4::getType()));
2983 }
2984
2985 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
2986 {
2987 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUNE(x.value, y.value), Int4::getType()));
2988 }
2989
2990 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
2991 {
2992 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGE(x.value, y.value), Int4::getType()));
2993 }
2994
2995 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
2996 {
2997 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpUGT(x.value, y.value), Int4::getType()));
2998 }
2999
John Bauman19bac1e2014-05-06 15:23:49 -04003000 RValue<Float4> Round(RValue<Float4> x)
3001 {
Logan Chiene3191012018-08-24 22:01:50 +08003002#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003003 if(CPUID::supportsSSE4_1())
3004 {
3005 return x86::roundps(x, 0);
3006 }
3007 else
3008 {
3009 return Float4(RoundInt(x));
3010 }
Logan Chien83fc07a2018-09-26 22:14:00 +08003011#else
3012 return RValue<Float4>(V(lowerRound(V(x.value))));
3013#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003014 }
3015
3016 RValue<Float4> Trunc(RValue<Float4> x)
3017 {
Logan Chiene3191012018-08-24 22:01:50 +08003018#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003019 if(CPUID::supportsSSE4_1())
3020 {
3021 return x86::roundps(x, 3);
3022 }
3023 else
3024 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003025 return Float4(Int4(x));
John Bauman19bac1e2014-05-06 15:23:49 -04003026 }
Logan Chien8c5ca8d2018-09-27 21:05:53 +08003027#else
3028 return RValue<Float4>(V(lowerTrunc(V(x.value))));
3029#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003030 }
3031
3032 RValue<Float4> Frac(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003033 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003034 Float4 frc;
3035
Logan Chien40a60052018-09-26 19:03:53 +08003036#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003037 if(CPUID::supportsSSE4_1())
3038 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003039 frc = x - Floor(x);
John Bauman89401822014-05-06 15:04:28 -04003040 }
3041 else
3042 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003043 frc = x - Float4(Int4(x)); // Signed fractional part.
John Bauman89401822014-05-06 15:04:28 -04003044
Nicolas Capensb9230422017-07-17 10:27:33 -04003045 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
John Bauman89401822014-05-06 15:04:28 -04003046 }
Logan Chien3c6a1ae2018-09-26 22:18:16 +08003047#else
3048 frc = x - Floor(x);
3049#endif
Nicolas Capensb9230422017-07-17 10:27:33 -04003050
3051 // x - floor(x) can be 1.0 for very small negative x.
3052 // Clamp against the value just below 1.0.
3053 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
John Bauman89401822014-05-06 15:04:28 -04003054 }
3055
John Bauman19bac1e2014-05-06 15:23:49 -04003056 RValue<Float4> Floor(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003057 {
Logan Chiene3191012018-08-24 22:01:50 +08003058#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003059 if(CPUID::supportsSSE4_1())
3060 {
3061 return x86::floorps(x);
3062 }
3063 else
3064 {
John Bauman19bac1e2014-05-06 15:23:49 -04003065 return x - Frac(x);
John Bauman89401822014-05-06 15:04:28 -04003066 }
Logan Chien40a60052018-09-26 19:03:53 +08003067#else
3068 return RValue<Float4>(V(lowerFloor(V(x.value))));
3069#endif
John Bauman89401822014-05-06 15:04:28 -04003070 }
3071
John Bauman19bac1e2014-05-06 15:23:49 -04003072 RValue<Float4> Ceil(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003073 {
Logan Chiene3191012018-08-24 22:01:50 +08003074#if defined(__i386__) || defined(__x86_64__)
John Bauman19bac1e2014-05-06 15:23:49 -04003075 if(CPUID::supportsSSE4_1())
3076 {
3077 return x86::ceilps(x);
3078 }
3079 else
Logan Chiene3191012018-08-24 22:01:50 +08003080#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003081 {
3082 return -Floor(-x);
3083 }
John Bauman89401822014-05-06 15:04:28 -04003084 }
3085
Ben Claytona2c8b772019-04-09 13:42:36 -04003086 RValue<Float4> Sin(RValue<Float4> v)
3087 {
3088 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sin, { V(v.value)->getType() } );
3089 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3090 }
3091
Ben Clayton1b6f8c72019-04-09 13:47:43 -04003092 RValue<Float4> Cos(RValue<Float4> v)
3093 {
3094 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::cos, { V(v.value)->getType() } );
3095 return RValue<Float4>(V(::builder->CreateCall(func, V(v.value))));
3096 }
3097
Ben Clayton14740062019-04-09 13:48:41 -04003098 RValue<Float4> Tan(RValue<Float4> v)
3099 {
3100 return Sin(v) / Cos(v);
3101 }
3102
Ben Claytoneafae472019-04-09 14:22:38 -04003103 static RValue<Float4> TransformFloat4PerElement(RValue<Float4> v, const char* name)
Ben Claytonf9350d72019-04-09 14:19:02 -04003104 {
3105 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()), {T(Float::getType())}, false);
Ben Claytoneafae472019-04-09 14:22:38 -04003106 auto func = ::module->getOrInsertFunction(name, funcTy);
Ben Claytonf9350d72019-04-09 14:19:02 -04003107 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3108 for (uint64_t i = 0; i < 4; i++)
3109 {
3110 auto el = ::builder->CreateCall(func, ::builder->CreateExtractElement(V(v.value), i));
3111 out = ::builder->CreateInsertElement(out, el, i);
3112 }
3113 return RValue<Float4>(V(out));
3114 }
3115
Ben Claytoneafae472019-04-09 14:22:38 -04003116 RValue<Float4> Asin(RValue<Float4> v)
3117 {
3118 return TransformFloat4PerElement(v, "asinf");
3119 }
3120
3121 RValue<Float4> Acos(RValue<Float4> v)
3122 {
3123 return TransformFloat4PerElement(v, "acosf");
3124 }
3125
Ben Clayton749b4e02019-04-09 14:27:43 -04003126 RValue<Float4> Atan(RValue<Float4> v)
3127 {
3128 return TransformFloat4PerElement(v, "atanf");
3129 }
3130
Ben Claytond9636972019-04-09 15:09:54 -04003131 RValue<Float4> Sinh(RValue<Float4> v)
3132 {
3133 return TransformFloat4PerElement(v, "sinhf");
3134 }
3135
Ben Clayton900ea2c2019-04-09 15:25:36 -04003136 RValue<Float4> Cosh(RValue<Float4> v)
3137 {
3138 return TransformFloat4PerElement(v, "coshf");
3139 }
3140
Ben Clayton3928bd92019-04-09 15:27:41 -04003141 RValue<Float4> Tanh(RValue<Float4> v)
3142 {
3143 return TransformFloat4PerElement(v, "tanhf");
3144 }
3145
Ben Claytonf6d77ab2019-04-09 15:30:04 -04003146 RValue<Float4> Asinh(RValue<Float4> v)
3147 {
3148 return TransformFloat4PerElement(v, "asinhf");
3149 }
3150
Ben Clayton28ebcb02019-04-09 15:33:38 -04003151 RValue<Float4> Acosh(RValue<Float4> v)
3152 {
3153 return TransformFloat4PerElement(v, "acoshf");
3154 }
3155
Ben Claytonfa6a5392019-04-09 15:35:24 -04003156 RValue<Float4> Atanh(RValue<Float4> v)
3157 {
3158 return TransformFloat4PerElement(v, "atanhf");
3159 }
3160
Ben Claytona520c3e2019-04-09 15:43:45 -04003161 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3162 {
3163 auto funcTy = ::llvm::FunctionType::get(T(Float::getType()),
3164 {T(Float::getType()), T(Float::getType())}, false);
3165 auto func = ::module->getOrInsertFunction("atan2f", funcTy);
3166 llvm::Value *out = ::llvm::UndefValue::get(T(Float4::getType()));
3167 for (uint64_t i = 0; i < 4; i++)
3168 {
3169 auto el = ::builder->CreateCall(func, {
3170 ::builder->CreateExtractElement(V(x.value), i),
3171 ::builder->CreateExtractElement(V(y.value), i),
3172 });
3173 out = ::builder->CreateInsertElement(out, el, i);
3174 }
3175 return RValue<Float4>(V(out));
3176 }
3177
Ben Claytonbfe94f02019-04-09 15:52:12 -04003178 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3179 {
3180 auto func = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::pow,
3181 { T(Float4::getType()), T(Float4::getType()) } );
3182 return RValue<Float4>(V(::builder->CreateCall(func, { V(x.value), V(y.value) })));
3183 }
3184
John Bauman19bac1e2014-05-06 15:23:49 -04003185 Type *Float4::getType()
John Bauman89401822014-05-06 15:04:28 -04003186 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003187 return T(llvm::VectorType::get(T(Float::getType()), 4));
John Bauman89401822014-05-06 15:04:28 -04003188 }
3189
John Bauman89401822014-05-06 15:04:28 -04003190 RValue<Long> Ticks()
3191 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003192 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
John Bauman89401822014-05-06 15:04:28 -04003193
Nicolas Capens2ab69ee2016-09-26 11:45:17 -04003194 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
John Bauman89401822014-05-06 15:04:28 -04003195 }
John Bauman89401822014-05-06 15:04:28 -04003196}
3197
Nicolas Capens48461502018-08-06 14:20:45 -04003198namespace rr
John Bauman89401822014-05-06 15:04:28 -04003199{
Logan Chiene3191012018-08-24 22:01:50 +08003200#if defined(__i386__) || defined(__x86_64__)
John Bauman89401822014-05-06 15:04:28 -04003201 namespace x86
3202 {
John Bauman19bac1e2014-05-06 15:23:49 -04003203 RValue<Int> cvtss2si(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003204 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003205 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
John Bauman66b8ab22014-05-06 15:57:45 -04003206
John Bauman89401822014-05-06 15:04:28 -04003207 Float4 vector;
3208 vector.x = val;
3209
Logan Chien813d5032018-08-31 17:19:45 +08003210 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
John Bauman89401822014-05-06 15:04:28 -04003211 }
3212
John Bauman19bac1e2014-05-06 15:23:49 -04003213 RValue<Int4> cvtps2dq(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003214 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003215 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
John Bauman89401822014-05-06 15:04:28 -04003216
Logan Chien813d5032018-08-31 17:19:45 +08003217 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003218 }
3219
John Bauman19bac1e2014-05-06 15:23:49 -04003220 RValue<Float> rcpss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003221 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003222 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
John Bauman89401822014-05-06 15:04:28 -04003223
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003224 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003225
Logan Chien813d5032018-08-31 17:19:45 +08003226 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003227 }
3228
John Bauman19bac1e2014-05-06 15:23:49 -04003229 RValue<Float> sqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003230 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003231#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003232 llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003233 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman66b8ab22014-05-06 15:57:45 -04003234
Logan Chien813d5032018-08-31 17:19:45 +08003235 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, ARGS(V(vector)))), Float::getType(), 0));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003236#else
3237 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3238 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
3239#endif
John Bauman89401822014-05-06 15:04:28 -04003240 }
3241
John Bauman19bac1e2014-05-06 15:23:49 -04003242 RValue<Float> rsqrtss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003243 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003244 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
John Bauman66b8ab22014-05-06 15:57:45 -04003245
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003246 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
John Bauman89401822014-05-06 15:04:28 -04003247
Logan Chien813d5032018-08-31 17:19:45 +08003248 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003249 }
3250
John Bauman19bac1e2014-05-06 15:23:49 -04003251 RValue<Float4> rcpps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003252 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003253 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003254
Logan Chien813d5032018-08-31 17:19:45 +08003255 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003256 }
3257
John Bauman19bac1e2014-05-06 15:23:49 -04003258 RValue<Float4> sqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003259 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003260#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003261 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
Logan Chien0eedc8c2018-08-21 09:34:28 +08003262#else
3263 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
3264#endif
John Bauman66b8ab22014-05-06 15:57:45 -04003265
Logan Chien813d5032018-08-31 17:19:45 +08003266 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003267 }
3268
John Bauman19bac1e2014-05-06 15:23:49 -04003269 RValue<Float4> rsqrtps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003270 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003271 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
John Bauman66b8ab22014-05-06 15:57:45 -04003272
Logan Chien813d5032018-08-31 17:19:45 +08003273 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
John Bauman89401822014-05-06 15:04:28 -04003274 }
3275
John Bauman19bac1e2014-05-06 15:23:49 -04003276 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003277 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003278 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
John Bauman89401822014-05-06 15:04:28 -04003279
Logan Chien813d5032018-08-31 17:19:45 +08003280 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003281 }
3282
John Bauman19bac1e2014-05-06 15:23:49 -04003283 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
John Bauman89401822014-05-06 15:04:28 -04003284 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003285 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
John Bauman89401822014-05-06 15:04:28 -04003286
Logan Chien813d5032018-08-31 17:19:45 +08003287 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003288 }
3289
John Bauman19bac1e2014-05-06 15:23:49 -04003290 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003291 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003292 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
John Bauman89401822014-05-06 15:04:28 -04003293
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003294 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
John Bauman89401822014-05-06 15:04:28 -04003295 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
3296
Logan Chien813d5032018-08-31 17:19:45 +08003297 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
John Bauman89401822014-05-06 15:04:28 -04003298 }
3299
John Bauman19bac1e2014-05-06 15:23:49 -04003300 RValue<Float> floorss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003301 {
3302 return roundss(val, 1);
3303 }
3304
John Bauman19bac1e2014-05-06 15:23:49 -04003305 RValue<Float> ceilss(RValue<Float> val)
John Bauman89401822014-05-06 15:04:28 -04003306 {
3307 return roundss(val, 2);
3308 }
3309
John Bauman19bac1e2014-05-06 15:23:49 -04003310 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
John Bauman89401822014-05-06 15:04:28 -04003311 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003312 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
John Bauman89401822014-05-06 15:04:28 -04003313
Logan Chien813d5032018-08-31 17:19:45 +08003314 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
John Bauman89401822014-05-06 15:04:28 -04003315 }
3316
John Bauman19bac1e2014-05-06 15:23:49 -04003317 RValue<Float4> floorps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003318 {
3319 return roundps(val, 1);
3320 }
3321
John Bauman19bac1e2014-05-06 15:23:49 -04003322 RValue<Float4> ceilps(RValue<Float4> val)
John Bauman89401822014-05-06 15:04:28 -04003323 {
3324 return roundps(val, 2);
3325 }
3326
Alexis Hetu0f448072016-03-18 10:56:08 -04003327 RValue<Int4> pabsd(RValue<Int4> x)
John Bauman89401822014-05-06 15:04:28 -04003328 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003329#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003330 llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
John Bauman89401822014-05-06 15:04:28 -04003331
Logan Chien813d5032018-08-31 17:19:45 +08003332 return RValue<Int4>(V(::builder->CreateCall(pabsd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003333#else
3334 return RValue<Int4>(V(lowerPABS(V(x.value))));
3335#endif
John Bauman89401822014-05-06 15:04:28 -04003336 }
3337
John Bauman19bac1e2014-05-06 15:23:49 -04003338 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003339 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003340 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
John Bauman89401822014-05-06 15:04:28 -04003341
Logan Chien813d5032018-08-31 17:19:45 +08003342 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003343 }
John Bauman66b8ab22014-05-06 15:57:45 -04003344
John Bauman19bac1e2014-05-06 15:23:49 -04003345 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003346 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003347 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
John Bauman89401822014-05-06 15:04:28 -04003348
Logan Chien813d5032018-08-31 17:19:45 +08003349 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003350 }
3351
John Bauman19bac1e2014-05-06 15:23:49 -04003352 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003353 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003354 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
John Bauman89401822014-05-06 15:04:28 -04003355
Logan Chien813d5032018-08-31 17:19:45 +08003356 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003357 }
John Bauman66b8ab22014-05-06 15:57:45 -04003358
John Bauman19bac1e2014-05-06 15:23:49 -04003359 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003360 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003361 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
John Bauman89401822014-05-06 15:04:28 -04003362
Logan Chien813d5032018-08-31 17:19:45 +08003363 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003364 }
3365
John Bauman19bac1e2014-05-06 15:23:49 -04003366 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003367 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003368 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
John Bauman89401822014-05-06 15:04:28 -04003369
Logan Chien813d5032018-08-31 17:19:45 +08003370 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003371 }
John Bauman66b8ab22014-05-06 15:57:45 -04003372
John Bauman19bac1e2014-05-06 15:23:49 -04003373 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003374 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003375 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
John Bauman89401822014-05-06 15:04:28 -04003376
Logan Chien813d5032018-08-31 17:19:45 +08003377 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003378 }
John Bauman66b8ab22014-05-06 15:57:45 -04003379
John Bauman19bac1e2014-05-06 15:23:49 -04003380 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003381 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003382 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
John Bauman89401822014-05-06 15:04:28 -04003383
Logan Chien813d5032018-08-31 17:19:45 +08003384 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003385 }
John Bauman66b8ab22014-05-06 15:57:45 -04003386
John Bauman19bac1e2014-05-06 15:23:49 -04003387 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003388 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003389 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
John Bauman89401822014-05-06 15:04:28 -04003390
Logan Chien813d5032018-08-31 17:19:45 +08003391 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
John Bauman19bac1e2014-05-06 15:23:49 -04003392 }
3393
3394 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003395 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003396#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003397 llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
John Bauman89401822014-05-06 15:04:28 -04003398
Logan Chien813d5032018-08-31 17:19:45 +08003399 return As<UShort4>(V(::builder->CreateCall2(pavgw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003400#else
3401 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
3402#endif
John Bauman89401822014-05-06 15:04:28 -04003403 }
3404
John Bauman19bac1e2014-05-06 15:23:49 -04003405 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003406 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003407#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003408 llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
John Bauman89401822014-05-06 15:04:28 -04003409
Logan Chien813d5032018-08-31 17:19:45 +08003410 return As<Short4>(V(::builder->CreateCall2(pmaxsw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003411#else
3412 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
3413#endif
John Bauman89401822014-05-06 15:04:28 -04003414 }
3415
John Bauman19bac1e2014-05-06 15:23:49 -04003416 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003417 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003418#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003419 llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
John Bauman89401822014-05-06 15:04:28 -04003420
Logan Chien813d5032018-08-31 17:19:45 +08003421 return As<Short4>(V(::builder->CreateCall2(pminsw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003422#else
3423 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
3424#endif
John Bauman89401822014-05-06 15:04:28 -04003425 }
3426
John Bauman19bac1e2014-05-06 15:23:49 -04003427 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003428 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003429#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003430 llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
John Bauman89401822014-05-06 15:04:28 -04003431
Logan Chien813d5032018-08-31 17:19:45 +08003432 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003433#else
3434 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
3435#endif
John Bauman89401822014-05-06 15:04:28 -04003436 }
3437
John Bauman19bac1e2014-05-06 15:23:49 -04003438 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003439 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003440#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003441 llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
John Bauman89401822014-05-06 15:04:28 -04003442
Logan Chien813d5032018-08-31 17:19:45 +08003443 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003444#else
3445 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
3446#endif
John Bauman89401822014-05-06 15:04:28 -04003447 }
3448
John Bauman19bac1e2014-05-06 15:23:49 -04003449 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
John Bauman89401822014-05-06 15:04:28 -04003450 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003451#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003452 llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
John Bauman89401822014-05-06 15:04:28 -04003453
Logan Chien813d5032018-08-31 17:19:45 +08003454 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003455#else
3456 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
3457#endif
John Bauman89401822014-05-06 15:04:28 -04003458 }
3459
John Bauman19bac1e2014-05-06 15:23:49 -04003460 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
John Bauman89401822014-05-06 15:04:28 -04003461 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003462#if REACTOR_LLVM_VERSION < 7
Nicolas Capens01a97962017-07-28 17:30:51 -04003463 llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
John Bauman89401822014-05-06 15:04:28 -04003464
Logan Chien813d5032018-08-31 17:19:45 +08003465 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003466#else
3467 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
3468#endif
John Bauman89401822014-05-06 15:04:28 -04003469 }
3470
John Bauman19bac1e2014-05-06 15:23:49 -04003471 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
John Bauman89401822014-05-06 15:04:28 -04003472 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003473 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003474
Logan Chien813d5032018-08-31 17:19:45 +08003475 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003476 }
3477
John Bauman19bac1e2014-05-06 15:23:49 -04003478 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003479 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003480 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
John Bauman89401822014-05-06 15:04:28 -04003481
Logan Chien813d5032018-08-31 17:19:45 +08003482 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003483 }
3484
John Bauman19bac1e2014-05-06 15:23:49 -04003485 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003486 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003487 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
John Bauman89401822014-05-06 15:04:28 -04003488
Logan Chien813d5032018-08-31 17:19:45 +08003489 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003490 }
3491
Nicolas Capens33438a62017-09-27 11:47:35 -04003492 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003493 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003494 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
John Bauman89401822014-05-06 15:04:28 -04003495
Logan Chien813d5032018-08-31 17:19:45 +08003496 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003497 }
3498
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003499 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
John Bauman89401822014-05-06 15:04:28 -04003500 {
3501 if(CPUID::supportsSSE4_1())
3502 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003503 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
John Bauman66b8ab22014-05-06 15:57:45 -04003504
Logan Chien813d5032018-08-31 17:19:45 +08003505 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003506 }
3507 else
3508 {
Nicolas Capens3e7062b2017-01-17 14:01:33 -05003509 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
3510 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
3511
3512 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
John Bauman89401822014-05-06 15:04:28 -04003513 }
3514 }
3515
John Bauman19bac1e2014-05-06 15:23:49 -04003516 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003517 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003518 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003519
Logan Chien813d5032018-08-31 17:19:45 +08003520 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003521 }
3522
John Bauman19bac1e2014-05-06 15:23:49 -04003523 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003524 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003525 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
John Bauman89401822014-05-06 15:04:28 -04003526
Logan Chien813d5032018-08-31 17:19:45 +08003527 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003528 }
3529
John Bauman19bac1e2014-05-06 15:23:49 -04003530 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003531 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003532 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003533
Logan Chien813d5032018-08-31 17:19:45 +08003534 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003535 }
3536
John Bauman19bac1e2014-05-06 15:23:49 -04003537 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003538 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003539 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
John Bauman89401822014-05-06 15:04:28 -04003540
Logan Chien813d5032018-08-31 17:19:45 +08003541 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003542 }
3543
John Bauman19bac1e2014-05-06 15:23:49 -04003544 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003545 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003546 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003547
Logan Chien813d5032018-08-31 17:19:45 +08003548 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003549 }
3550
John Bauman19bac1e2014-05-06 15:23:49 -04003551 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003552 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003553 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
John Bauman89401822014-05-06 15:04:28 -04003554
Logan Chien813d5032018-08-31 17:19:45 +08003555 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003556 }
3557
John Bauman19bac1e2014-05-06 15:23:49 -04003558 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003559 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003560 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003561
Logan Chien813d5032018-08-31 17:19:45 +08003562 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003563 }
3564
John Bauman19bac1e2014-05-06 15:23:49 -04003565 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003566 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003567 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
John Bauman89401822014-05-06 15:04:28 -04003568
Logan Chien813d5032018-08-31 17:19:45 +08003569 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003570 }
3571
John Bauman19bac1e2014-05-06 15:23:49 -04003572 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003573 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003574 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003575
Logan Chien813d5032018-08-31 17:19:45 +08003576 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003577 }
3578
John Bauman19bac1e2014-05-06 15:23:49 -04003579 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003580 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003581 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
John Bauman89401822014-05-06 15:04:28 -04003582
Logan Chien813d5032018-08-31 17:19:45 +08003583 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003584 }
3585
John Bauman19bac1e2014-05-06 15:23:49 -04003586 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003587 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003588 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003589
Logan Chien813d5032018-08-31 17:19:45 +08003590 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003591 }
3592
John Bauman19bac1e2014-05-06 15:23:49 -04003593 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
John Bauman89401822014-05-06 15:04:28 -04003594 {
Nicolas Capens9e013d42017-07-28 17:26:14 -04003595 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
John Bauman89401822014-05-06 15:04:28 -04003596
Logan Chien813d5032018-08-31 17:19:45 +08003597 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
John Bauman89401822014-05-06 15:04:28 -04003598 }
3599
John Bauman19bac1e2014-05-06 15:23:49 -04003600 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
3601 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003602#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003603 llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
John Bauman19bac1e2014-05-06 15:23:49 -04003604
Logan Chien813d5032018-08-31 17:19:45 +08003605 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003606#else
3607 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
3608#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003609 }
3610
3611 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
3612 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003613#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003614 llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
John Bauman19bac1e2014-05-06 15:23:49 -04003615
Logan Chien813d5032018-08-31 17:19:45 +08003616 return RValue<Int4>(V(::builder->CreateCall2(pminsd, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003617#else
3618 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
3619#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003620 }
3621
3622 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
3623 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003624#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003625 llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
John Bauman19bac1e2014-05-06 15:23:49 -04003626
Logan Chien813d5032018-08-31 17:19:45 +08003627 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003628#else
3629 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
3630#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003631 }
3632
3633 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
3634 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003635#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003636 llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
John Bauman19bac1e2014-05-06 15:23:49 -04003637
Logan Chien813d5032018-08-31 17:19:45 +08003638 return RValue<UInt4>(V(::builder->CreateCall2(pminud, ARGS(V(x.value), V(y.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003639#else
3640 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
3641#endif
John Bauman19bac1e2014-05-06 15:23:49 -04003642 }
3643
3644 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003645 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003646 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003647
Logan Chien813d5032018-08-31 17:19:45 +08003648 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003649 }
3650
John Bauman19bac1e2014-05-06 15:23:49 -04003651 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
John Bauman89401822014-05-06 15:04:28 -04003652 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003653 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003654
Logan Chien813d5032018-08-31 17:19:45 +08003655 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003656 }
3657
John Bauman19bac1e2014-05-06 15:23:49 -04003658 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
John Bauman89401822014-05-06 15:04:28 -04003659 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003660 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003661
Logan Chien813d5032018-08-31 17:19:45 +08003662 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003663 }
3664
John Bauman19bac1e2014-05-06 15:23:49 -04003665 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003666 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003667 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
John Bauman89401822014-05-06 15:04:28 -04003668
Logan Chien813d5032018-08-31 17:19:45 +08003669 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003670 }
3671
John Bauman19bac1e2014-05-06 15:23:49 -04003672 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
John Bauman89401822014-05-06 15:04:28 -04003673 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003674 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
John Bauman89401822014-05-06 15:04:28 -04003675
Logan Chien813d5032018-08-31 17:19:45 +08003676 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003677 }
3678
John Bauman19bac1e2014-05-06 15:23:49 -04003679 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
John Bauman89401822014-05-06 15:04:28 -04003680 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003681 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
John Bauman89401822014-05-06 15:04:28 -04003682
Logan Chien813d5032018-08-31 17:19:45 +08003683 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
John Bauman89401822014-05-06 15:04:28 -04003684 }
3685
John Bauman19bac1e2014-05-06 15:23:49 -04003686 RValue<Int> movmskps(RValue<Float4> x)
John Bauman89401822014-05-06 15:04:28 -04003687 {
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003688 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
John Bauman89401822014-05-06 15:04:28 -04003689
Logan Chien813d5032018-08-31 17:19:45 +08003690 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
John Bauman89401822014-05-06 15:04:28 -04003691 }
3692
John Bauman19bac1e2014-05-06 15:23:49 -04003693 RValue<Int> pmovmskb(RValue<Byte8> x)
John Bauman89401822014-05-06 15:04:28 -04003694 {
Nicolas Capens01a97962017-07-28 17:30:51 -04003695 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
John Bauman89401822014-05-06 15:04:28 -04003696
Logan Chien813d5032018-08-31 17:19:45 +08003697 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
John Bauman89401822014-05-06 15:04:28 -04003698 }
3699
Nicolas Capens01a97962017-07-28 17:30:51 -04003700 RValue<Int4> pmovzxbd(RValue<Byte16> x)
John Bauman89401822014-05-06 15:04:28 -04003701 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003702#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003703 llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
John Bauman66b8ab22014-05-06 15:57:45 -04003704
Logan Chien813d5032018-08-31 17:19:45 +08003705 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003706#else
3707 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
3708#endif
John Bauman89401822014-05-06 15:04:28 -04003709 }
3710
Nicolas Capens01a97962017-07-28 17:30:51 -04003711 RValue<Int4> pmovsxbd(RValue<SByte16> x)
John Bauman89401822014-05-06 15:04:28 -04003712 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003713#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003714 llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
John Bauman66b8ab22014-05-06 15:57:45 -04003715
Logan Chien813d5032018-08-31 17:19:45 +08003716 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003717#else
3718 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
3719#endif
John Bauman89401822014-05-06 15:04:28 -04003720 }
3721
Nicolas Capens01a97962017-07-28 17:30:51 -04003722 RValue<Int4> pmovzxwd(RValue<UShort8> x)
John Bauman89401822014-05-06 15:04:28 -04003723 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003724#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003725 llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
John Bauman66b8ab22014-05-06 15:57:45 -04003726
Logan Chien813d5032018-08-31 17:19:45 +08003727 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003728#else
3729 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
3730#endif
John Bauman89401822014-05-06 15:04:28 -04003731 }
3732
Nicolas Capens01a97962017-07-28 17:30:51 -04003733 RValue<Int4> pmovsxwd(RValue<Short8> x)
John Bauman89401822014-05-06 15:04:28 -04003734 {
Nicolas Capensf417d9d2018-10-10 10:49:30 -04003735#if REACTOR_LLVM_VERSION < 7
Nicolas Capensfbf2bc52017-07-26 17:26:17 -04003736 llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
John Bauman66b8ab22014-05-06 15:57:45 -04003737
Logan Chien813d5032018-08-31 17:19:45 +08003738 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, ARGS(V(x.value)))));
Logan Chien0eedc8c2018-08-21 09:34:28 +08003739#else
3740 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
3741#endif
John Bauman89401822014-05-06 15:04:28 -04003742 }
3743 }
Logan Chiene3191012018-08-24 22:01:50 +08003744#endif // defined(__i386__) || defined(__x86_64__)
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003745
Ben Clayton60a3d6f2019-02-26 17:24:46 +00003746#ifdef ENABLE_RR_PRINT
Ben Clayton1bc7ee92019-02-14 18:43:22 +00003747 // extractAll returns a vector containing the extracted n scalar value of
3748 // the vector vec.
3749 static std::vector<Value*> extractAll(Value* vec, int n)
3750 {
3751 std::vector<Value*> elements;
3752 elements.reserve(n);
3753 for (int i = 0; i < n; i++)
3754 {
3755 auto el = V(::builder->CreateExtractElement(V(vec), i));
3756 elements.push_back(el);
3757 }
3758 return elements;
3759 }
3760
3761 // toDouble returns all the float values in vals extended to doubles.
3762 static std::vector<Value*> toDouble(const std::vector<Value*>& vals)
3763 {
3764 auto doubleTy = ::llvm::Type::getDoubleTy(*::context);
3765 std::vector<Value*> elements;
3766 elements.reserve(vals.size());
3767 for (auto v : vals)
3768 {
3769 elements.push_back(V(::builder->CreateFPExt(V(v), doubleTy)));
3770 }
3771 return elements;
3772 }
3773
3774 std::vector<Value*> PrintValue::Ty<Byte4>::val(const RValue<Byte4>& v) { return extractAll(v.value, 4); }
3775 std::vector<Value*> PrintValue::Ty<Int4>::val(const RValue<Int4>& v) { return extractAll(v.value, 4); }
3776 std::vector<Value*> PrintValue::Ty<UInt4>::val(const RValue<UInt4>& v) { return extractAll(v.value, 4); }
3777 std::vector<Value*> PrintValue::Ty<Short4>::val(const RValue<Short4>& v) { return extractAll(v.value, 4); }
3778 std::vector<Value*> PrintValue::Ty<UShort4>::val(const RValue<UShort4>& v) { return extractAll(v.value, 4); }
3779 std::vector<Value*> PrintValue::Ty<Float>::val(const RValue<Float>& v) { return toDouble({v.value}); }
3780 std::vector<Value*> PrintValue::Ty<Float4>::val(const RValue<Float4>& v) { return toDouble(extractAll(v.value, 4)); }
3781
3782 void Printv(const char* function, const char* file, int line, const char* fmt, std::initializer_list<PrintValue> args)
3783 {
3784 // LLVM types used below.
3785 auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
3786 auto intTy = ::llvm::Type::getInt64Ty(*::context); // TODO: Natural int width.
3787 auto i8PtrTy = ::llvm::Type::getInt8PtrTy(*::context);
3788 auto funcTy = ::llvm::FunctionType::get(i32Ty, {i8PtrTy}, true);
3789
3790 auto func = ::module->getOrInsertFunction("printf", funcTy);
3791
3792 // Build the printf format message string.
3793 std::string str;
3794 if (file != nullptr) { str += (line > 0) ? "%s:%d " : "%s "; }
3795 if (function != nullptr) { str += "%s "; }
3796 str += fmt;
3797
3798 // Perform subsitution on all '{n}' bracketed indices in the format
3799 // message.
3800 int i = 0;
3801 for (const PrintValue& arg : args)
3802 {
3803 str = replace(str, "{" + std::to_string(i++) + "}", arg.format);
3804 }
3805
3806 ::llvm::SmallVector<::llvm::Value*, 8> vals;
3807
3808 // The format message is always the first argument.
3809 vals.push_back(::builder->CreateGlobalStringPtr(str));
3810
3811 // Add optional file, line and function info if provided.
3812 if (file != nullptr)
3813 {
3814 vals.push_back(::builder->CreateGlobalStringPtr(file));
3815 if (line > 0)
3816 {
3817 vals.push_back(::llvm::ConstantInt::get(intTy, line));
3818 }
3819 }
3820 if (function != nullptr)
3821 {
3822 vals.push_back(::builder->CreateGlobalStringPtr(function));
3823 }
3824
3825 // Add all format arguments.
3826 for (const PrintValue& arg : args)
3827 {
3828 for (auto val : arg.values)
3829 {
3830 vals.push_back(V(val));
3831 }
3832 }
3833
3834 ::builder->CreateCall(func, vals);
3835 }
3836#endif // ENABLE_RR_PRINT
3837
John Bauman89401822014-05-06 15:04:28 -04003838}