blob: e3d8e11cb608cb1bff44b382734fe2ddf544f5a5 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
36#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000037# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040039
Nicolas Capensbd65da92017-01-05 16:31:06 -050040#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# ifndef WIN32_LEAN_AND_MEAN
42# define WIN32_LEAN_AND_MEAN
43# endif // !WIN32_LEAN_AND_MEAN
44# ifndef NOMINMAX
45# define NOMINMAX
46# endif // !NOMINMAX
47# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050048#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040049
Ben Clayton683bad82020-02-10 23:57:09 +000050#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040051#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000052#include <limits>
53#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040054
Antonio Maiorano02a39532020-01-21 15:15:34 -050055// Subzero utility functions
56// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050057namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050058namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050059void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
60{
61 ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
62
63 if(function->getEntryNode() == newEntryNode)
64 {
65 return;
66 }
67
68 // Make this the new entry node
69 function->setEntryNode(newEntryNode);
70
71 // Reorder nodes so that new entry block comes first. This is required
72 // by Cfg::renumberInstructions, which expects the first node in the list
73 // to be the entry node.
74 {
75 auto nodes = function->getNodes();
76
77 // TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
78
79 auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
80 ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
81
82 nodes.erase(iter);
83 nodes.insert(nodes.begin(), newEntryNode);
84
85 // swapNodes replaces its nodes with the input one, and renumbers them,
86 // so our new entry node will be 0, and the previous will be 1.
87 function->swapNodes(nodes);
88 }
89}
90
91Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
92{
93 uint32_t sequenceNumber = 0;
94 auto function = Ice::Cfg::create(context, sequenceNumber).release();
95
96 Ice::CfgLocalAllocatorScope allocScope{ function };
97
98 for(auto type : paramTypes)
99 {
100 Ice::Variable *arg = function->makeVariable(type);
101 function->addArg(arg);
102 }
103
104 Ice::CfgNode *node = function->makeNode();
105 function->setEntryNode(node);
106
107 return function;
108}
109
110Ice::Type getPointerType(Ice::Type elementType)
111{
112 if(sizeof(void *) == 8)
113 {
114 return Ice::IceType_i64;
115 }
116 else
117 {
118 return Ice::IceType_i32;
119 }
120}
121
122Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
123{
124 int typeSize = Ice::typeWidthInBytes(type);
125 int totalSize = typeSize * (arraySize ? arraySize : 1);
126
127 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
128 auto address = function->makeVariable(getPointerType(type));
129 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
130 function->getEntryNode()->getInsts().push_front(alloca);
131
132 return address;
133}
134
135Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500136{
137 if(sizeof(void *) == 8)
138 {
139 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
140 }
141 else
142 {
143 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
144 }
145}
146
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500147// Wrapper for calls on C functions with Ice types
Antonio Maiorano62427e02020-02-13 09:18:05 -0500148Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500149{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500150 // Subzero doesn't support boolean return values. Replace with an i32.
151 if(retTy == Ice::IceType_i1)
152 {
153 retTy = Ice::IceType_i32;
154 }
155
156 Ice::Variable *ret = nullptr;
157 if(retTy != Ice::IceType_void)
158 {
159 ret = function->makeVariable(retTy);
160 }
161
Antonio Maiorano62427e02020-02-13 09:18:05 -0500162 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, getConstantPointer(function->getContext(), fptr), false);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500163 for(auto arg : iceArgs)
164 {
165 call->addArg(arg);
166 }
167
168 basicBlock->appendInst(call);
169 return ret;
170}
171
Antonio Maiorano62427e02020-02-13 09:18:05 -0500172// Wrapper for calls on C functions with Ice types
173template<typename Return, typename... CArgs, typename... RArgs>
174Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
175{
176 Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
177 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
178 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs);
179}
180
Antonio Maiorano02a39532020-01-21 15:15:34 -0500181// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500182Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500183{
184 Ice::Variable *result = function->makeVariable(v->getType());
185 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
186 basicBlock->appendInst(cast);
187 return result;
188}
189
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500190Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500191{
192 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
193 // absolute address. We circumvent this by casting to a non-const variable, and loading
194 // from that.
195 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
196 {
197 ptr = sz::createUnconstCast(function, basicBlock, cptr);
198 }
199
200 Ice::Variable *result = function->makeVariable(type);
201 auto load = Ice::InstLoad::create(function, result, ptr, align);
202 basicBlock->appendInst(load);
203
204 return result;
205}
206
207} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500208} // namespace
209
Ben Clayton713b8d32019-12-17 20:37:56 +0000210namespace rr {
211class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500212class CoroutineGenerator;
213} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500214
215namespace {
216
217// Default configuration settings. Must be accessed under mutex lock.
218std::mutex defaultConfigLock;
219rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000220{
Nicolas Capens157ba262019-12-10 17:49:14 -0500221 // This uses a static in a function to avoid the cost of a global static
222 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
223 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000224 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500225 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000226}
227
Nicolas Capens157ba262019-12-10 17:49:14 -0500228Ice::GlobalContext *context = nullptr;
229Ice::Cfg *function = nullptr;
230Ice::CfgNode *basicBlock = nullptr;
231Ice::CfgLocalAllocatorScope *allocator = nullptr;
232rr::ELFMemoryStreamer *routine = nullptr;
233
234std::mutex codegenMutex;
235
236Ice::ELFFileStreamer *elfFile = nullptr;
237Ice::Fdstream *out = nullptr;
238
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500239// Coroutine globals
240rr::Type *coroYieldType = nullptr;
241std::shared_ptr<rr::CoroutineGenerator> coroGen;
242
Nicolas Capens157ba262019-12-10 17:49:14 -0500243} // Anonymous namespace
244
245namespace {
246
247#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000248# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500249#endif
250
Ben Clayton713b8d32019-12-17 20:37:56 +0000251#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
252# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500253#endif
254
Antonio Maiorano370cba52019-12-31 11:36:07 -0500255Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400256{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500257 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100258 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500259 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000260 case rr::Optimization::Level::None: return Ice::Opt_m1;
261 case rr::Optimization::Level::Less: return Ice::Opt_m1;
262 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500263 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
264 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100265 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500266 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400267}
268
Antonio Maiorano370cba52019-12-31 11:36:07 -0500269Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
270{
271 switch(memoryOrder)
272 {
273 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
274 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
275 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
276 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
277 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
278 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
279 }
280 return Ice::Intrinsics::MemoryOrderInvalid;
281}
282
Nicolas Capens157ba262019-12-10 17:49:14 -0500283class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500284{
Nicolas Capens157ba262019-12-10 17:49:14 -0500285public:
286 const static bool ARM;
287 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400288
Nicolas Capens157ba262019-12-10 17:49:14 -0500289private:
290 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100291 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000292#if defined(__i386__) || defined(__x86_64__)
293# if defined(_WIN32)
294 __cpuid(registers, info);
295# else
296 __asm volatile("cpuid"
297 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
298 : "a"(info));
299# endif
300#else
301 registers[0] = 0;
302 registers[1] = 0;
303 registers[2] = 0;
304 registers[3] = 0;
305#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100306 }
307
Nicolas Capens157ba262019-12-10 17:49:14 -0500308 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500309 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000310#if defined(__arm__) || defined(__aarch64__)
311 return true;
312#elif defined(__i386__) || defined(__x86_64__)
313 return false;
314#elif defined(__mips__)
315 return false;
316#else
317# error "Unknown architecture"
318#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500319 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500320
Nicolas Capens157ba262019-12-10 17:49:14 -0500321 static bool detectSSE4_1()
322 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000323#if defined(__i386__) || defined(__x86_64__)
324 int registers[4];
325 cpuid(registers, 1);
326 return (registers[2] & 0x00080000) != 0;
327#else
328 return false;
329#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500330 }
331};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500332
Nicolas Capens157ba262019-12-10 17:49:14 -0500333const bool CPUID::ARM = CPUID::detectARM();
334const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
335const bool emulateIntrinsics = false;
336const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400337
Nicolas Capens157ba262019-12-10 17:49:14 -0500338constexpr bool subzeroDumpEnabled = false;
339constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500340
341#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500342static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
343static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500344#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500345
346} // anonymous namespace
347
348namespace rr {
349
Antonio Maioranoab210f92019-12-13 16:26:24 -0500350std::string BackendName()
351{
352 return "Subzero";
353}
354
Ben Clayton713b8d32019-12-17 20:37:56 +0000355const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500356#if defined(_WIN32)
357 true, // CoroutinesSupported
358#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000359 false, // CoroutinesSupported
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500360#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500361};
362
363enum EmulatedType
364{
365 EmulatedShift = 16,
366 EmulatedV2 = 2 << EmulatedShift,
367 EmulatedV4 = 4 << EmulatedShift,
368 EmulatedV8 = 8 << EmulatedShift,
369 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
370
371 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
372 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
373 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000374 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
375 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500376 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
377};
378
Ben Clayton713b8d32019-12-17 20:37:56 +0000379class Value : public Ice::Operand
380{};
381class SwitchCases : public Ice::InstSwitch
382{};
383class BasicBlock : public Ice::CfgNode
384{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500385
386Ice::Type T(Type *t)
387{
388 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
389 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500390}
391
Nicolas Capens157ba262019-12-10 17:49:14 -0500392Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400393{
Ben Clayton713b8d32019-12-17 20:37:56 +0000394 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500395}
396
397Type *T(EmulatedType t)
398{
Ben Clayton713b8d32019-12-17 20:37:56 +0000399 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500400}
401
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500402std::vector<Ice::Type> T(const std::vector<Type *> &types)
403{
404 std::vector<Ice::Type> result;
405 result.reserve(types.size());
406 for(auto &t : types)
407 {
408 result.push_back(T(t));
409 }
410 return result;
411}
412
Nicolas Capens157ba262019-12-10 17:49:14 -0500413Value *V(Ice::Operand *v)
414{
Ben Clayton713b8d32019-12-17 20:37:56 +0000415 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500416}
417
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500418Ice::Operand *V(Value *v)
419{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500420 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500421}
422
Antonio Maiorano62427e02020-02-13 09:18:05 -0500423std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
424{
425 std::vector<Ice::Operand *> result;
426 result.reserve(values.size());
427 for(auto &v : values)
428 {
429 result.push_back(V(v));
430 }
431 return result;
432}
433
Nicolas Capens157ba262019-12-10 17:49:14 -0500434BasicBlock *B(Ice::CfgNode *b)
435{
Ben Clayton713b8d32019-12-17 20:37:56 +0000436 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500437}
438
439static size_t typeSize(Type *type)
440{
441 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400442 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500443 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800444 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000445 case Type_v2i32: return 8;
446 case Type_v4i16: return 8;
447 case Type_v2i16: return 4;
448 case Type_v8i8: return 8;
449 case Type_v4i8: return 4;
450 case Type_v2f32: return 8;
451 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500452 }
453 }
454
455 return Ice::typeWidthInBytes(T(type));
456}
457
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500458static void createRetVoidIfNoRet()
459{
460 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
461 {
462 Nucleus::createRetVoid();
463 }
464}
465
Ben Clayton713b8d32019-12-17 20:37:56 +0000466using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
467using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500468
469inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
470{
Ben Clayton713b8d32019-12-17 20:37:56 +0000471 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500472}
473
474inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
475{
476 return &sectionHeader(elfHeader)[index];
477}
478
479static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
480{
481 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
482
483 uint32_t index = relocation.getSymbol();
484 int table = relocationTable.sh_link;
485 void *symbolValue = nullptr;
486
487 if(index != SHN_UNDEF)
488 {
489 if(table == SHN_UNDEF) return nullptr;
490 const SectionHeader *symbolTable = elfSection(elfHeader, table);
491
492 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
493 if(index >= symtab_entries)
494 {
495 ASSERT(index < symtab_entries && "Symbol Index out of range");
496 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800497 }
498
Nicolas Capens157ba262019-12-10 17:49:14 -0500499 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000500 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500501 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800502
Nicolas Capens157ba262019-12-10 17:49:14 -0500503 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400504 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500505 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000506 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400507 }
508 else
509 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400511 }
Nicolas Capens66478362016-10-13 15:36:36 -0400512 }
513
Nicolas Capens157ba262019-12-10 17:49:14 -0500514 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000515 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500516
517 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400518 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400519 switch(relocation.getType())
520 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000521 case R_ARM_NONE:
522 // No relocation
523 break;
524 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500525 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000526 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500527 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
528 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
529 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400530 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000531 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500532 {
533 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
534 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
535 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400536 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000537 default:
538 ASSERT(false && "Unsupported relocation type");
539 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400540 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500541 }
542 else
543 {
544 switch(relocation.getType())
545 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000546 case R_386_NONE:
547 // No relocation
548 break;
549 case R_386_32:
550 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
551 break;
552 case R_386_PC32:
553 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
554 break;
555 default:
556 ASSERT(false && "Unsupported relocation type");
557 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500558 }
Nicolas Capens66478362016-10-13 15:36:36 -0400559 }
560
Nicolas Capens157ba262019-12-10 17:49:14 -0500561 return symbolValue;
562}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400563
Nicolas Capens157ba262019-12-10 17:49:14 -0500564static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
565{
566 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
567
568 uint32_t index = relocation.getSymbol();
569 int table = relocationTable.sh_link;
570 void *symbolValue = nullptr;
571
572 if(index != SHN_UNDEF)
573 {
574 if(table == SHN_UNDEF) return nullptr;
575 const SectionHeader *symbolTable = elfSection(elfHeader, table);
576
577 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
578 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400579 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500580 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400581 return nullptr;
582 }
583
Nicolas Capens157ba262019-12-10 17:49:14 -0500584 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000585 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500586 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400587
Nicolas Capens157ba262019-12-10 17:49:14 -0500588 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400589 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500590 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000591 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 }
593 else
594 {
595 return nullptr;
596 }
597 }
Nicolas Capens66478362016-10-13 15:36:36 -0400598
Nicolas Capens157ba262019-12-10 17:49:14 -0500599 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000600 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
601 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400602
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 switch(relocation.getType())
604 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000605 case R_X86_64_NONE:
606 // No relocation
607 break;
608 case R_X86_64_64:
609 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
610 break;
611 case R_X86_64_PC32:
612 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
613 break;
614 case R_X86_64_32S:
615 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
616 break;
617 default:
618 ASSERT(false && "Unsupported relocation type");
619 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 }
621
622 return symbolValue;
623}
624
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500625void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -0500626{
Ben Clayton713b8d32019-12-17 20:37:56 +0000627 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500628
629 if(!elfHeader->checkMagic())
630 {
631 return nullptr;
632 }
633
634 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000635 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
636#if defined(__i386__)
637 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
638#elif defined(__x86_64__)
639 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
640#elif defined(__arm__)
641 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
642#elif defined(__aarch64__)
643 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
644#elif defined(__mips__)
645 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
646#else
647# error "Unsupported platform"
648#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500649
Ben Clayton713b8d32019-12-17 20:37:56 +0000650 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500651 void *entry = nullptr;
652
653 for(int i = 0; i < elfHeader->e_shnum; i++)
654 {
655 if(sectionHeader[i].sh_type == SHT_PROGBITS)
656 {
657 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
658 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500659 auto getCurrSectionName = [&]() {
660 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
661 return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
662 };
663 if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
664 {
665 continue;
666 }
667
Nicolas Capens157ba262019-12-10 17:49:14 -0500668 entry = elfImage + sectionHeader[i].sh_offset;
669 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400670 }
671 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500672 else if(sectionHeader[i].sh_type == SHT_REL)
673 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000674 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400675
Nicolas Capens157ba262019-12-10 17:49:14 -0500676 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
677 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000678 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500679 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
680 }
681 }
682 else if(sectionHeader[i].sh_type == SHT_RELA)
683 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000684 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500685
686 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
687 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000688 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500689 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
690 }
691 }
692 }
693
694 return entry;
695}
696
697template<typename T>
698struct ExecutableAllocator
699{
700 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000701 template<class U>
702 ExecutableAllocator(const ExecutableAllocator<U> &other)
703 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500704
705 using value_type = T;
706 using size_type = std::size_t;
707
708 T *allocate(size_type n)
709 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000710 return (T *)allocateMemoryPages(
711 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500712 }
713
714 void deallocate(T *p, size_type n)
715 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800716 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500717 }
718};
719
720class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
721{
722 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
723 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
724
725public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000726 ELFMemoryStreamer()
727 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500728 {
729 position = 0;
730 buffer.reserve(0x1000);
731 }
732
733 ~ELFMemoryStreamer() override
734 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500735 }
736
737 void write8(uint8_t Value) override
738 {
739 if(position == (uint64_t)buffer.size())
740 {
741 buffer.push_back(Value);
742 position++;
743 }
744 else if(position < (uint64_t)buffer.size())
745 {
746 buffer[position] = Value;
747 position++;
748 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000749 else
750 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500751 }
752
753 void writeBytes(llvm::StringRef Bytes) override
754 {
755 std::size_t oldSize = buffer.size();
756 buffer.resize(oldSize + Bytes.size());
757 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
758 position += Bytes.size();
759 }
760
761 uint64_t tell() const override { return position; }
762
763 void seek(uint64_t Off) override { position = Off; }
764
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500765 const void *getEntryByName(const char *name)
Nicolas Capens157ba262019-12-10 17:49:14 -0500766 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500767 size_t codeSize = 0;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500768 const void *entry = loadImage(&buffer[0], codeSize, name);
Nicolas Capens157ba262019-12-10 17:49:14 -0500769
770#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500771 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
772#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000773 __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
Nicolas Capens157ba262019-12-10 17:49:14 -0500774#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500775
Nicolas Capens598f8d82016-09-26 15:09:10 -0400776 return entry;
777 }
778
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500779 void finalize()
780 {
781 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
782
783 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
784 }
785
Ben Clayton713b8d32019-12-17 20:37:56 +0000786 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400787 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500788 ASSERT(func);
789 funcs[index] = func;
790 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400791
Nicolas Capens157ba262019-12-10 17:49:14 -0500792 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400793 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500794 ASSERT(funcs[index]);
795 return funcs[index];
796 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400797
Antonio Maiorano02a39532020-01-21 15:15:34 -0500798 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500799 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500800 // TODO(b/148086935): Replace with a buffer allocator.
801 size_t space = size + alignment;
802 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
803 void *ptr = buf.get();
804 void *alignedPtr = std::align(alignment, size, ptr, space);
805 ASSERT(alignedPtr);
806 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500807 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500808 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500809 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400810
Nicolas Capens157ba262019-12-10 17:49:14 -0500811private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000812 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500813 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
814 std::size_t position;
815 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500816};
817
Antonio Maiorano62427e02020-02-13 09:18:05 -0500818#ifdef ENABLE_RR_PRINT
819void VPrintf(const std::vector<Value *> &vals)
820{
821 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(::printf), V(vals));
822}
823#endif // ENABLE_RR_PRINT
824
Nicolas Capens157ba262019-12-10 17:49:14 -0500825Nucleus::Nucleus()
826{
Ben Clayton713b8d32019-12-17 20:37:56 +0000827 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500828
829 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
830 Ice::ClFlags::getParsedClFlags(Flags);
831
Ben Clayton713b8d32019-12-17 20:37:56 +0000832#if defined(__arm__)
833 Flags.setTargetArch(Ice::Target_ARM32);
834 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
835#elif defined(__mips__)
836 Flags.setTargetArch(Ice::Target_MIPS32);
837 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
838#else // x86
839 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
840 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
841#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500842 Flags.setOutFileType(Ice::FT_Elf);
843 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
844 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
845 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
846 Flags.setDisableHybridAssembly(true);
847
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500848 // Emit functions into separate sections in the ELF so we can find them by name
849 Flags.setFunctionSections(true);
850
Nicolas Capens157ba262019-12-10 17:49:14 -0500851 static llvm::raw_os_ostream cout(std::cout);
852 static llvm::raw_os_ostream cerr(std::cerr);
853
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500854 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500855 {
856 // Decorate text asm with liveness info
857 Flags.setDecorateAsm(true);
858 }
859
Ben Clayton713b8d32019-12-17 20:37:56 +0000860 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500861 {
862 std::error_code errorCode;
863 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
864 ::elfFile = new Ice::ELFFileStreamer(*out);
865 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
866 }
867 else
868 {
869 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
870 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
871 ::routine = elfMemory;
872 }
873}
874
875Nucleus::~Nucleus()
876{
877 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500878 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500879
880 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500881 ::allocator = nullptr;
882
Nicolas Capens157ba262019-12-10 17:49:14 -0500883 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500884 ::function = nullptr;
885
Nicolas Capens157ba262019-12-10 17:49:14 -0500886 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500887 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500888
889 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500890 ::elfFile = nullptr;
891
Nicolas Capens157ba262019-12-10 17:49:14 -0500892 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500893 ::out = nullptr;
894
895 ::basicBlock = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500896
897 ::codegenMutex.unlock();
898}
899
900void Nucleus::setDefaultConfig(const Config &cfg)
901{
902 std::unique_lock<std::mutex> lock(::defaultConfigLock);
903 ::defaultConfig() = cfg;
904}
905
906void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
907{
908 std::unique_lock<std::mutex> lock(::defaultConfigLock);
909 auto &config = ::defaultConfig();
910 config = cfgEdit.apply(config);
911}
912
913Config Nucleus::getDefaultConfig()
914{
915 std::unique_lock<std::mutex> lock(::defaultConfigLock);
916 return ::defaultConfig();
917}
918
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500919// This function lowers and produces executable binary code in memory for the input functions,
920// and returns a Routine with the entry points to these functions.
921template<size_t Count>
922static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500923{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500924 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
925 // and GlobalContext::emitItems.
926
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500927 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500928 {
929 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500930 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500931 }
932
933 ::context->emitFileHeader();
934
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500935 // Translate
936
937 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500938 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500939 Ice::Cfg *currFunc = functions[i];
940
941 // Install function allocator in TLS for Cfg-specific container allocators
942 Ice::CfgLocalAllocatorScope allocScope(currFunc);
943
944 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
945
946 rr::optimize(currFunc);
947
948 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500949 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500950
951 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500952 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500953
954 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
955
956 if(subzeroEmitTextAsm)
957 {
958 currFunc->emit();
959 }
960
961 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -0500962 }
963
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964 // Emit items
965
966 ::context->lowerGlobals("");
967
Nicolas Capens157ba262019-12-10 17:49:14 -0500968 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500969
970 for(size_t i = 0; i < Count; ++i)
971 {
972 Ice::Cfg *currFunc = functions[i];
973
974 // Accumulate globals from functions to emit into the "last" section at the end
975 auto globals = currFunc->getGlobalInits();
976 if(globals && !globals->empty())
977 {
978 ::context->getGlobals()->merge(globals.get());
979 }
980
981 auto assembler = currFunc->releaseAssembler();
982 assembler->alignFunction();
983 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
984 }
985
Nicolas Capens157ba262019-12-10 17:49:14 -0500986 ::context->lowerGlobals("last");
987 ::context->lowerConstants();
988 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500989
Nicolas Capens157ba262019-12-10 17:49:14 -0500990 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500991 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -0500992 objectWriter->writeNonUserSections();
993
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500994 // Done compiling functions, get entry pointers to each of them
995 for(size_t i = 0; i < Count; ++i)
996 {
997 const void *entry = ::routine->getEntryByName(names[i]);
998 ::routine->setEntry(i, entry);
999 }
1000
1001 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001002
1003 Routine *handoffRoutine = ::routine;
1004 ::routine = nullptr;
1005
1006 return std::shared_ptr<Routine>(handoffRoutine);
1007}
1008
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001009std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1010{
1011 createRetVoidIfNoRet();
1012 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1013}
1014
Nicolas Capens157ba262019-12-10 17:49:14 -05001015Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1016{
1017 Ice::Type type = T(t);
1018 int typeSize = Ice::typeWidthInBytes(type);
1019 int totalSize = typeSize * (arraySize ? arraySize : 1);
1020
1021 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1022 auto address = ::function->makeVariable(T(getPointerType(t)));
1023 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1024 ::function->getEntryNode()->getInsts().push_front(alloca);
1025
1026 return V(address);
1027}
1028
1029BasicBlock *Nucleus::createBasicBlock()
1030{
1031 return B(::function->makeNode());
1032}
1033
1034BasicBlock *Nucleus::getInsertBlock()
1035{
1036 return B(::basicBlock);
1037}
1038
1039void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1040{
Ben Clayton713b8d32019-12-17 20:37:56 +00001041 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001042
1043 Variable::materializeAll();
1044
1045 ::basicBlock = basicBlock;
1046}
1047
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001048void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001049{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001050 ASSERT(::function == nullptr);
1051 ASSERT(::allocator == nullptr);
1052 ASSERT(::basicBlock == nullptr);
1053
1054 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1055
1056 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1057 // becomes invalid if another one is created; for example, when creating await and destroy functions
1058 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1059 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001060 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1061
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001062 ::basicBlock = ::function->getEntryNode();
Nicolas Capens157ba262019-12-10 17:49:14 -05001063}
1064
1065Value *Nucleus::getArgument(unsigned int index)
1066{
1067 return V(::function->getArgs()[index]);
1068}
1069
1070void Nucleus::createRetVoid()
1071{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001072 RR_DEBUG_INFO_UPDATE_LOC();
1073
Nicolas Capens157ba262019-12-10 17:49:14 -05001074 // Code generated after this point is unreachable, so any variables
1075 // being read can safely return an undefined value. We have to avoid
1076 // materializing variables after the terminator ret instruction.
1077 Variable::killUnmaterialized();
1078
1079 Ice::InstRet *ret = Ice::InstRet::create(::function);
1080 ::basicBlock->appendInst(ret);
1081}
1082
1083void Nucleus::createRet(Value *v)
1084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001085 RR_DEBUG_INFO_UPDATE_LOC();
1086
Nicolas Capens157ba262019-12-10 17:49:14 -05001087 // Code generated after this point is unreachable, so any variables
1088 // being read can safely return an undefined value. We have to avoid
1089 // materializing variables after the terminator ret instruction.
1090 Variable::killUnmaterialized();
1091
1092 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1093 ::basicBlock->appendInst(ret);
1094}
1095
1096void Nucleus::createBr(BasicBlock *dest)
1097{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001098 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001099 Variable::materializeAll();
1100
1101 auto br = Ice::InstBr::create(::function, dest);
1102 ::basicBlock->appendInst(br);
1103}
1104
1105void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1106{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001107 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001108 Variable::materializeAll();
1109
1110 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1111 ::basicBlock->appendInst(br);
1112}
1113
1114static bool isCommutative(Ice::InstArithmetic::OpKind op)
1115{
1116 switch(op)
1117 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001118 case Ice::InstArithmetic::Add:
1119 case Ice::InstArithmetic::Fadd:
1120 case Ice::InstArithmetic::Mul:
1121 case Ice::InstArithmetic::Fmul:
1122 case Ice::InstArithmetic::And:
1123 case Ice::InstArithmetic::Or:
1124 case Ice::InstArithmetic::Xor:
1125 return true;
1126 default:
1127 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001128 }
1129}
1130
1131static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1132{
1133 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1134
1135 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1136
1137 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1138 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1139 ::basicBlock->appendInst(arithmetic);
1140
1141 return V(result);
1142}
1143
1144Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1145{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001146 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001147 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1148}
1149
1150Value *Nucleus::createSub(Value *lhs, Value *rhs)
1151{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001152 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001153 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1154}
1155
1156Value *Nucleus::createMul(Value *lhs, Value *rhs)
1157{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001158 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001159 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1160}
1161
1162Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1163{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001164 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001165 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1166}
1167
1168Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001170 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001171 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1172}
1173
1174Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1175{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001176 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001177 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1178}
1179
1180Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1181{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001182 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001183 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1184}
1185
1186Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1187{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001188 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001189 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1190}
1191
1192Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1193{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001194 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001195 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1196}
1197
1198Value *Nucleus::createURem(Value *lhs, Value *rhs)
1199{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001200 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001201 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1202}
1203
1204Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1205{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001206 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001207 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1208}
1209
1210Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1211{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001212 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001213 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1214 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001215 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001216 return nullptr;
1217}
1218
1219RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1220{
1221 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001222}
1223
1224Value *Nucleus::createShl(Value *lhs, Value *rhs)
1225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001227 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1228}
1229
1230Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001233 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1234}
1235
1236Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001239 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1240}
1241
1242Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001244 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001245 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1246}
1247
1248Value *Nucleus::createOr(Value *lhs, Value *rhs)
1249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001251 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1252}
1253
1254Value *Nucleus::createXor(Value *lhs, Value *rhs)
1255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001256 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001257 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1258}
1259
1260Value *Nucleus::createNeg(Value *v)
1261{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001262 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001263 return createSub(createNullValue(T(v->getType())), v);
1264}
1265
1266Value *Nucleus::createFNeg(Value *v)
1267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001268 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001269 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1270 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001271
1272 return createFSub(negativeZero, v);
1273}
1274
1275Value *Nucleus::createNot(Value *v)
1276{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001277 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001278 if(Ice::isScalarIntegerType(v->getType()))
1279 {
1280 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1281 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001282 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001284 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001285 return createXor(v, createConstantVector(c, T(v->getType())));
1286 }
1287}
1288
1289Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001291 RR_DEBUG_INFO_UPDATE_LOC();
1292
Ben Clayton713b8d32019-12-17 20:37:56 +00001293 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001294 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
1295
1296 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001297 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001298
Ben Clayton713b8d32019-12-17 20:37:56 +00001299 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001300 {
1301 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001302 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001303 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001304 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001305 auto pointer = RValue<Pointer<Byte>>(ptr);
1306 Int x = *Pointer<Int>(pointer);
1307
1308 Int4 vector;
1309 vector = Insert(vector, x, 0);
1310
Antonio Maiorano02a39532020-01-21 15:15:34 -05001311 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001312 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1313 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001314 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001315 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001316 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001317 auto pointer = RValue<Pointer<Byte>>(ptr);
1318 Int x = *Pointer<Int>(pointer);
1319 Int y = *Pointer<Int>(pointer + 4);
1320
1321 Int4 vector;
1322 vector = Insert(vector, x, 0);
1323 vector = Insert(vector, y, 1);
1324
Antonio Maiorano02a39532020-01-21 15:15:34 -05001325 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001326 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1327 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001328 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001329 else
1330 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001331 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001332 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001333 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001334 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001335 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001336 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001337 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1338 load->addArg(ptr);
1339 load->addArg(::context->getConstantInt32(typeSize(type)));
1340 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001341 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001342 }
1343 else
1344 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001345 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001346 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001347
Antonio Maiorano02a39532020-01-21 15:15:34 -05001348 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001349 return V(result);
1350}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001351
Nicolas Capens157ba262019-12-10 17:49:14 -05001352Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001354 RR_DEBUG_INFO_UPDATE_LOC();
1355
Ben Clayton713b8d32019-12-17 20:37:56 +00001356 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001357 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens598f8d82016-09-26 15:09:10 -04001358
Ben Clayton713b8d32019-12-17 20:37:56 +00001359#if __has_feature(memory_sanitizer)
1360 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
1361 if(align != 0)
1362 {
1363 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1364 call->addArg(ptr);
1365 call->addArg(::context->getConstantInt64(typeSize(type)));
1366 ::basicBlock->appendInst(call);
1367 }
1368#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001369
Nicolas Capens157ba262019-12-10 17:49:14 -05001370 int valueType = (int)reinterpret_cast<intptr_t>(type);
1371
Ben Clayton713b8d32019-12-17 20:37:56 +00001372 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001373 {
1374 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001375 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001376 if(typeSize(type) == 4)
1377 {
1378 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1379 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1380 ::basicBlock->appendInst(bitcast);
1381
1382 RValue<Int4> v(V(vector));
1383
1384 auto pointer = RValue<Pointer<Byte>>(ptr);
1385 Int x = Extract(v, 0);
1386 *Pointer<Int>(pointer) = x;
1387 }
1388 else if(typeSize(type) == 8)
1389 {
1390 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1391 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1392 ::basicBlock->appendInst(bitcast);
1393
1394 RValue<Int4> v(V(vector));
1395
1396 auto pointer = RValue<Pointer<Byte>>(ptr);
1397 Int x = Extract(v, 0);
1398 *Pointer<Int>(pointer) = x;
1399 Int y = Extract(v, 1);
1400 *Pointer<Int>(pointer + 4) = y;
1401 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001402 else
1403 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001404 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001405 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001406 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001407 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001408 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1409 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1410 store->addArg(value);
1411 store->addArg(ptr);
1412 store->addArg(::context->getConstantInt32(typeSize(type)));
1413 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001414 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001415 }
1416 else
1417 {
1418 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001419
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001420 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001421 ::basicBlock->appendInst(store);
1422 }
1423
1424 return value;
1425}
1426
1427Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1428{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001429 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001430 ASSERT(index->getType() == Ice::IceType_i32);
1431
1432 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1433 {
1434 int32_t offset = constant->getValue() * (int)typeSize(type);
1435
1436 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001437 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001438 return ptr;
1439 }
1440
Nicolas Capens157ba262019-12-10 17:49:14 -05001441 return createAdd(ptr, createConstantInt(offset));
1442 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001443
Nicolas Capens157ba262019-12-10 17:49:14 -05001444 if(!Ice::isByteSizedType(T(type)))
1445 {
1446 index = createMul(index, createConstantInt((int)typeSize(type)));
1447 }
1448
Ben Clayton713b8d32019-12-17 20:37:56 +00001449 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001450 {
1451 if(unsignedIndex)
1452 {
1453 index = createZExt(index, T(Ice::IceType_i64));
1454 }
1455 else
1456 {
1457 index = createSExt(index, T(Ice::IceType_i64));
1458 }
1459 }
1460
1461 return createAdd(ptr, index);
1462}
1463
Antonio Maiorano370cba52019-12-31 11:36:07 -05001464static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1465{
1466 Ice::Variable *result = ::function->makeVariable(value->getType());
1467
1468 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1469 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1470 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1471 auto op = ::context->getConstantInt32(rmwOp);
1472 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1473 inst->addArg(op);
1474 inst->addArg(ptr);
1475 inst->addArg(value);
1476 inst->addArg(order);
1477 ::basicBlock->appendInst(inst);
1478
1479 return V(result);
1480}
1481
Nicolas Capens157ba262019-12-10 17:49:14 -05001482Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1483{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001484 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001485 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001486}
1487
1488Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1489{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001490 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001491 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001492}
1493
1494Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1495{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001496 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001497 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001498}
1499
1500Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1501{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001502 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001503 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001504}
1505
1506Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1507{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001508 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001509 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001510}
1511
1512Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1513{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001514 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001515 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001516}
1517
1518Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1519{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001520 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001521 Ice::Variable *result = ::function->makeVariable(value->getType());
1522
1523 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1524 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1525 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1526 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1527 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1528 inst->addArg(ptr);
1529 inst->addArg(compare);
1530 inst->addArg(value);
1531 inst->addArg(orderEq);
1532 inst->addArg(orderNeq);
1533 ::basicBlock->appendInst(inst);
1534
1535 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001536}
1537
1538static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1539{
1540 if(v->getType() == T(destType))
1541 {
1542 return v;
1543 }
1544
1545 Ice::Variable *result = ::function->makeVariable(T(destType));
1546 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1547 ::basicBlock->appendInst(cast);
1548
1549 return V(result);
1550}
1551
1552Value *Nucleus::createTrunc(Value *v, Type *destType)
1553{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001554 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001555 return createCast(Ice::InstCast::Trunc, v, destType);
1556}
1557
1558Value *Nucleus::createZExt(Value *v, Type *destType)
1559{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001560 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001561 return createCast(Ice::InstCast::Zext, v, destType);
1562}
1563
1564Value *Nucleus::createSExt(Value *v, Type *destType)
1565{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001566 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001567 return createCast(Ice::InstCast::Sext, v, destType);
1568}
1569
1570Value *Nucleus::createFPToUI(Value *v, Type *destType)
1571{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001572 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001573 return createCast(Ice::InstCast::Fptoui, v, destType);
1574}
1575
1576Value *Nucleus::createFPToSI(Value *v, Type *destType)
1577{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001578 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001579 return createCast(Ice::InstCast::Fptosi, v, destType);
1580}
1581
1582Value *Nucleus::createSIToFP(Value *v, Type *destType)
1583{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001584 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001585 return createCast(Ice::InstCast::Sitofp, v, destType);
1586}
1587
1588Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1589{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001590 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001591 return createCast(Ice::InstCast::Fptrunc, v, destType);
1592}
1593
1594Value *Nucleus::createFPExt(Value *v, Type *destType)
1595{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001596 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001597 return createCast(Ice::InstCast::Fpext, v, destType);
1598}
1599
1600Value *Nucleus::createBitCast(Value *v, Type *destType)
1601{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001602 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001603 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1604 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1605 // emulate them by writing to the stack and reading back as the destination type.
1606 if(emulateMismatchedBitCast)
1607 {
1608 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1609 {
1610 Value *address = allocateStackVariable(destType);
1611 createStore(v, address, T(v->getType()));
1612 return createLoad(address, destType);
1613 }
1614 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1615 {
1616 Value *address = allocateStackVariable(T(v->getType()));
1617 createStore(v, address, T(v->getType()));
1618 return createLoad(address, destType);
1619 }
1620 }
1621
1622 return createCast(Ice::InstCast::Bitcast, v, destType);
1623}
1624
1625static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1626{
1627 ASSERT(lhs->getType() == rhs->getType());
1628
1629 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1630 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1631 ::basicBlock->appendInst(cmp);
1632
1633 return V(result);
1634}
1635
1636Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1637{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001638 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001639 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1640}
1641
1642Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1643{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001644 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001645 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1646}
1647
1648Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1649{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001650 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001651 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1652}
1653
1654Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1655{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001656 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001657 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1658}
1659
1660Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1661{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001662 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001663 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1664}
1665
1666Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1667{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001668 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001669 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1670}
1671
1672Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1673{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001674 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001675 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1676}
1677
1678Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1679{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001680 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001681 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1682}
1683
1684Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1685{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001686 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001687 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1688}
1689
1690Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1691{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001692 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001693 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1694}
1695
1696Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1697{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001698 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001699 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1700}
1701
1702static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1703{
1704 ASSERT(lhs->getType() == rhs->getType());
1705 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1706
1707 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1708 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1709 ::basicBlock->appendInst(cmp);
1710
1711 return V(result);
1712}
1713
1714Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1715{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001716 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001717 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1718}
1719
1720Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1721{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001722 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001723 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1724}
1725
1726Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1727{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001728 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001729 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1730}
1731
1732Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1733{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001734 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001735 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1736}
1737
1738Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1739{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001740 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001741 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1742}
1743
1744Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1745{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001746 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001747 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1748}
1749
1750Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1751{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001752 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001753 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1754}
1755
1756Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1757{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001759 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1760}
1761
1762Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1763{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001764 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001765 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1766}
1767
1768Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001771 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1772}
1773
1774Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1775{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001776 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001777 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1778}
1779
1780Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1781{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001782 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001783 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1784}
1785
1786Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1787{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001788 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001789 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1790}
1791
1792Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1793{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001794 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001795 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1796}
1797
1798Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1799{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001800 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001801 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001802 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001803 ::basicBlock->appendInst(extract);
1804
1805 return V(result);
1806}
1807
1808Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1809{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001810 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001811 auto result = ::function->makeVariable(vector->getType());
1812 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1813 ::basicBlock->appendInst(insert);
1814
1815 return V(result);
1816}
1817
1818Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1819{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001820 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001821 ASSERT(V1->getType() == V2->getType());
1822
1823 int size = Ice::typeNumElements(V1->getType());
1824 auto result = ::function->makeVariable(V1->getType());
1825 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1826
1827 for(int i = 0; i < size; i++)
1828 {
1829 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1830 }
1831
1832 ::basicBlock->appendInst(shuffle);
1833
1834 return V(result);
1835}
1836
1837Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 ASSERT(ifTrue->getType() == ifFalse->getType());
1841
1842 auto result = ::function->makeVariable(ifTrue->getType());
1843 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1844 ::basicBlock->appendInst(select);
1845
1846 return V(result);
1847}
1848
1849SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001852 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1853 ::basicBlock->appendInst(switchInst);
1854
Ben Clayton713b8d32019-12-17 20:37:56 +00001855 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001856}
1857
1858void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1859{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001860 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001861 switchCases->addBranch(label, label, branch);
1862}
1863
1864void Nucleus::createUnreachable()
1865{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001866 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001867 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1868 ::basicBlock->appendInst(unreachable);
1869}
1870
Antonio Maiorano62427e02020-02-13 09:18:05 -05001871Type *Nucleus::getType(Value *value)
1872{
1873 return T(V(value)->getType());
1874}
1875
1876Type *Nucleus::getContainedType(Type *vectorType)
1877{
1878 Ice::Type vecTy = T(vectorType);
1879 switch(vecTy)
1880 {
1881 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1882 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1883 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1884 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1885 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1886 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1887 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1888 default:
1889 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1890 return {};
1891 }
1892}
1893
Nicolas Capens157ba262019-12-10 17:49:14 -05001894Type *Nucleus::getPointerType(Type *ElementType)
1895{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001896 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001897}
1898
Antonio Maiorano62427e02020-02-13 09:18:05 -05001899static constexpr Ice::Type getNaturalIntType()
1900{
1901 constexpr size_t intSize = sizeof(int);
1902 static_assert(intSize == 4 || intSize == 8, "");
1903 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1904}
1905
1906Type *Nucleus::getPrintfStorageType(Type *valueType)
1907{
1908 Ice::Type valueTy = T(valueType);
1909 switch(valueTy)
1910 {
1911 case Ice::IceType_i32:
1912 return T(getNaturalIntType());
1913
1914 case Ice::IceType_f32:
1915 return T(Ice::IceType_f64);
1916
1917 default:
1918 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1919 return {};
1920 }
1921}
1922
Nicolas Capens157ba262019-12-10 17:49:14 -05001923Value *Nucleus::createNullValue(Type *Ty)
1924{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001925 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001926 if(Ice::isVectorType(T(Ty)))
1927 {
1928 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001929 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001930 return createConstantVector(c, Ty);
1931 }
1932 else
1933 {
1934 return V(::context->getConstantZero(T(Ty)));
1935 }
1936}
1937
1938Value *Nucleus::createConstantLong(int64_t i)
1939{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001940 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001941 return V(::context->getConstantInt64(i));
1942}
1943
1944Value *Nucleus::createConstantInt(int i)
1945{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001946 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001947 return V(::context->getConstantInt32(i));
1948}
1949
1950Value *Nucleus::createConstantInt(unsigned int i)
1951{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001952 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001953 return V(::context->getConstantInt32(i));
1954}
1955
1956Value *Nucleus::createConstantBool(bool b)
1957{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001958 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001959 return V(::context->getConstantInt1(b));
1960}
1961
1962Value *Nucleus::createConstantByte(signed char i)
1963{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001964 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001965 return V(::context->getConstantInt8(i));
1966}
1967
1968Value *Nucleus::createConstantByte(unsigned char i)
1969{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001970 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001971 return V(::context->getConstantInt8(i));
1972}
1973
1974Value *Nucleus::createConstantShort(short i)
1975{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001976 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001977 return V(::context->getConstantInt16(i));
1978}
1979
1980Value *Nucleus::createConstantShort(unsigned short i)
1981{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001982 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001983 return V(::context->getConstantInt16(i));
1984}
1985
1986Value *Nucleus::createConstantFloat(float x)
1987{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001988 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001989 return V(::context->getConstantFloat(x));
1990}
1991
1992Value *Nucleus::createNullPointer(Type *Ty)
1993{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001994 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001995 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05001996}
1997
Antonio Maiorano02a39532020-01-21 15:15:34 -05001998static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
1999{
2000 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2001}
2002
Nicolas Capens157ba262019-12-10 17:49:14 -05002003Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2004{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002005 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002006 const int vectorSize = 16;
2007 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2008 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002009
2010 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002011 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002012
2013 // TODO(148082873): Fix global variable constants when generating multiple functions
2014 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002015
2016 switch((int)reinterpret_cast<intptr_t>(type))
2017 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002018 case Ice::IceType_v4i32:
2019 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002020 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002021 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002022 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002023 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002024 }
2025 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002026 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002027 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002028 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002029 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002030 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002031 }
2032 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002033 case Ice::IceType_v8i16:
2034 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002035 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002036 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002037 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002038 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002039 }
2040 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002041 case Ice::IceType_v16i8:
2042 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002043 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002044 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002045 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002046 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002047 }
2048 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002049 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002050 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002051 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002052 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002053 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002054 }
2055 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002056 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002057 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002058 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002059 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002060 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002061 }
2062 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002063 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002064 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002065 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002066 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002067 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002068 }
2069 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002070 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002071 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002072 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002073 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002074 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002075 }
2076 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002077 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002078 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002079 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002080 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002081 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002082 }
2083 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002084 default:
2085 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002086 }
2087
Antonio Maiorano02a39532020-01-21 15:15:34 -05002088 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002089
Antonio Maiorano02a39532020-01-21 15:15:34 -05002090 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002091 return V(result);
2092}
2093
2094Value *Nucleus::createConstantVector(const double *constants, Type *type)
2095{
Ben Clayton713b8d32019-12-17 20:37:56 +00002096 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002097}
2098
Antonio Maiorano62427e02020-02-13 09:18:05 -05002099Value *Nucleus::createConstantString(const char *v)
2100{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002101 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002102 return V(IceConstantData(v, strlen(v) + 1));
2103}
2104
Nicolas Capens157ba262019-12-10 17:49:14 -05002105Type *Void::getType()
2106{
2107 return T(Ice::IceType_void);
2108}
2109
2110Type *Bool::getType()
2111{
2112 return T(Ice::IceType_i1);
2113}
2114
2115Type *Byte::getType()
2116{
2117 return T(Ice::IceType_i8);
2118}
2119
2120Type *SByte::getType()
2121{
2122 return T(Ice::IceType_i8);
2123}
2124
2125Type *Short::getType()
2126{
2127 return T(Ice::IceType_i16);
2128}
2129
2130Type *UShort::getType()
2131{
2132 return T(Ice::IceType_i16);
2133}
2134
2135Type *Byte4::getType()
2136{
2137 return T(Type_v4i8);
2138}
2139
2140Type *SByte4::getType()
2141{
2142 return T(Type_v4i8);
2143}
2144
Ben Clayton713b8d32019-12-17 20:37:56 +00002145namespace {
2146RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002147{
Ben Clayton713b8d32019-12-17 20:37:56 +00002148 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002149}
2150
Ben Clayton713b8d32019-12-17 20:37:56 +00002151RValue<Byte> Extract(RValue<Byte8> val, int i)
2152{
2153 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
2154}
2155
2156RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2157{
2158 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2159}
2160} // namespace
2161
Nicolas Capens157ba262019-12-10 17:49:14 -05002162RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2163{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002164 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002165 if(emulateIntrinsics)
2166 {
2167 Byte8 result;
2168 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2169 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2170 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2171 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2172 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2173 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2174 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2175 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2176
2177 return result;
2178 }
2179 else
2180 {
2181 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002182 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002183 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2184 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2185 paddusb->addArg(x.value);
2186 paddusb->addArg(y.value);
2187 ::basicBlock->appendInst(paddusb);
2188
2189 return RValue<Byte8>(V(result));
2190 }
2191}
2192
2193RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2194{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002195 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002196 if(emulateIntrinsics)
2197 {
2198 Byte8 result;
2199 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2200 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2201 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2202 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2203 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2204 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2205 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2206 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2207
2208 return result;
2209 }
2210 else
2211 {
2212 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002213 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002214 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2215 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2216 psubusw->addArg(x.value);
2217 psubusw->addArg(y.value);
2218 ::basicBlock->appendInst(psubusw);
2219
2220 return RValue<Byte8>(V(result));
2221 }
2222}
2223
2224RValue<SByte> Extract(RValue<SByte8> val, int i)
2225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002227 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2228}
2229
2230RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002233 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2234}
2235
2236RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002239 if(emulateIntrinsics)
2240 {
2241 SByte8 result;
2242 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2243 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2244 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2245 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2246 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2247 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2248 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2249 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2250
2251 return result;
2252 }
2253 else
2254 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002255#if defined(__i386__) || defined(__x86_64__)
2256 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2257 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2258 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002259
Ben Clayton713b8d32019-12-17 20:37:56 +00002260 return As<SByte8>(hi | lo);
2261#else
2262 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2263#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002264 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002265}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002266
Nicolas Capens157ba262019-12-10 17:49:14 -05002267RValue<Int> SignMask(RValue<Byte8> x)
2268{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002269 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002270 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002271 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002272 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2273 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002274 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002275 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002276 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002277 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002278 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002279 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2280 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2281 movmsk->addArg(x.value);
2282 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002283
Nicolas Capens157ba262019-12-10 17:49:14 -05002284 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002285 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002286}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002287
2288// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2289// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002290// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002291// }
2292
Nicolas Capens157ba262019-12-10 17:49:14 -05002293RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2294{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002295 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002296 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2297}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002298
Nicolas Capens157ba262019-12-10 17:49:14 -05002299Type *Byte8::getType()
2300{
2301 return T(Type_v8i8);
2302}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002303
Nicolas Capens598f8d82016-09-26 15:09:10 -04002304// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2305// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002306// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002307// }
2308
2309// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2310// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002311// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002312// }
2313
Nicolas Capens157ba262019-12-10 17:49:14 -05002314RValue<SByte> SaturateSigned(RValue<Short> x)
2315{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002316 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002317 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2318}
2319
2320RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2321{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002322 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002323 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002324 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002325 SByte8 result;
2326 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2327 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2328 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2329 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2330 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2331 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2332 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2333 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002334
Nicolas Capens157ba262019-12-10 17:49:14 -05002335 return result;
2336 }
2337 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002338 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002339 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002340 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002341 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2342 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2343 paddsb->addArg(x.value);
2344 paddsb->addArg(y.value);
2345 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002346
Nicolas Capens157ba262019-12-10 17:49:14 -05002347 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002348 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002349}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002350
Nicolas Capens157ba262019-12-10 17:49:14 -05002351RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2352{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002353 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002354 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002355 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002356 SByte8 result;
2357 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2358 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2359 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2360 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2361 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2362 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2363 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2364 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002365
Nicolas Capens157ba262019-12-10 17:49:14 -05002366 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002367 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002368 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002369 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002370 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002371 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002372 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2373 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2374 psubsb->addArg(x.value);
2375 psubsb->addArg(y.value);
2376 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002377
Nicolas Capens157ba262019-12-10 17:49:14 -05002378 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002379 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002380}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002381
Nicolas Capens157ba262019-12-10 17:49:14 -05002382RValue<Int> SignMask(RValue<SByte8> x)
2383{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002384 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002385 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002386 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002387 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2388 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002389 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002390 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002391 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002392 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002393 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002394 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2395 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2396 movmsk->addArg(x.value);
2397 ::basicBlock->appendInst(movmsk);
2398
2399 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002400 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002401}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002402
Nicolas Capens157ba262019-12-10 17:49:14 -05002403RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2404{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002405 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002406 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2407}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002408
Nicolas Capens157ba262019-12-10 17:49:14 -05002409RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2410{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002411 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002412 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2413}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002414
Nicolas Capens157ba262019-12-10 17:49:14 -05002415Type *SByte8::getType()
2416{
2417 return T(Type_v8i8);
2418}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002419
Nicolas Capens157ba262019-12-10 17:49:14 -05002420Type *Byte16::getType()
2421{
2422 return T(Ice::IceType_v16i8);
2423}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002424
Nicolas Capens157ba262019-12-10 17:49:14 -05002425Type *SByte16::getType()
2426{
2427 return T(Ice::IceType_v16i8);
2428}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002429
Nicolas Capens157ba262019-12-10 17:49:14 -05002430Type *Short2::getType()
2431{
2432 return T(Type_v2i16);
2433}
Nicolas Capensd4227962016-11-09 14:24:25 -05002434
Nicolas Capens157ba262019-12-10 17:49:14 -05002435Type *UShort2::getType()
2436{
2437 return T(Type_v2i16);
2438}
Nicolas Capensd4227962016-11-09 14:24:25 -05002439
Nicolas Capens157ba262019-12-10 17:49:14 -05002440Short4::Short4(RValue<Int4> cast)
2441{
Ben Clayton713b8d32019-12-17 20:37:56 +00002442 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2444 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2445
2446 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2447 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2448
2449 storeValue(short4);
2450}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002451
2452// Short4::Short4(RValue<Float> cast)
2453// {
2454// }
2455
Nicolas Capens157ba262019-12-10 17:49:14 -05002456Short4::Short4(RValue<Float4> cast)
2457{
Ben Claytonce54c592020-02-07 11:30:51 +00002458 UNIMPLEMENTED_NO_BUG("Short4::Short4(RValue<Float4> cast)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002459}
2460
2461RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2462{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002463 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002464 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002465 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002466 Short4 result;
2467 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2468 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2469 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2470 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002471
2472 return result;
2473 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002474 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002475 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002476 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2477 }
2478}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002479
Nicolas Capens157ba262019-12-10 17:49:14 -05002480RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2481{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002482 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002483 if(emulateIntrinsics)
2484 {
2485 Short4 result;
2486 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2487 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2488 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2489 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2490
2491 return result;
2492 }
2493 else
2494 {
2495 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2496 }
2497}
2498
2499RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2500{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002501 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002502 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2503 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2504 ::basicBlock->appendInst(cmp);
2505
2506 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2507 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2508 ::basicBlock->appendInst(select);
2509
2510 return RValue<Short4>(V(result));
2511}
2512
2513RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002515 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002516 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2517 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2518 ::basicBlock->appendInst(cmp);
2519
2520 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2521 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2522 ::basicBlock->appendInst(select);
2523
2524 return RValue<Short4>(V(result));
2525}
2526
2527RValue<Short> SaturateSigned(RValue<Int> x)
2528{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002529 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002530 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2531}
2532
2533RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2534{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002535 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002536 if(emulateIntrinsics)
2537 {
2538 Short4 result;
2539 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2540 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2541 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2542 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2543
2544 return result;
2545 }
2546 else
2547 {
2548 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002549 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002550 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2551 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2552 paddsw->addArg(x.value);
2553 paddsw->addArg(y.value);
2554 ::basicBlock->appendInst(paddsw);
2555
2556 return RValue<Short4>(V(result));
2557 }
2558}
2559
2560RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2561{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002562 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002563 if(emulateIntrinsics)
2564 {
2565 Short4 result;
2566 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2567 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2568 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2569 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2570
2571 return result;
2572 }
2573 else
2574 {
2575 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002576 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002577 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2578 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2579 psubsw->addArg(x.value);
2580 psubsw->addArg(y.value);
2581 ::basicBlock->appendInst(psubsw);
2582
2583 return RValue<Short4>(V(result));
2584 }
2585}
2586
2587RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2588{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002589 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002590 if(emulateIntrinsics)
2591 {
2592 Short4 result;
2593 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2594 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2595 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2596 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2597
2598 return result;
2599 }
2600 else
2601 {
2602 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002603 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002604 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2605 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2606 pmulhw->addArg(x.value);
2607 pmulhw->addArg(y.value);
2608 ::basicBlock->appendInst(pmulhw);
2609
2610 return RValue<Short4>(V(result));
2611 }
2612}
2613
2614RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2615{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002616 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002617 if(emulateIntrinsics)
2618 {
2619 Int2 result;
2620 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2621 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2622
2623 return result;
2624 }
2625 else
2626 {
2627 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002628 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002629 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2630 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2631 pmaddwd->addArg(x.value);
2632 pmaddwd->addArg(y.value);
2633 ::basicBlock->appendInst(pmaddwd);
2634
2635 return As<Int2>(V(result));
2636 }
2637}
2638
2639RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2640{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002641 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002642 if(emulateIntrinsics)
2643 {
2644 SByte8 result;
2645 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2646 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2647 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2648 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2649 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2650 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2651 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2652 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2653
2654 return result;
2655 }
2656 else
2657 {
2658 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002659 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002660 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2661 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2662 pack->addArg(x.value);
2663 pack->addArg(y.value);
2664 ::basicBlock->appendInst(pack);
2665
2666 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2667 }
2668}
2669
2670RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2671{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002672 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002673 if(emulateIntrinsics)
2674 {
2675 Byte8 result;
2676 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2677 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2678 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2679 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2680 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2681 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2682 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2683 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2684
2685 return result;
2686 }
2687 else
2688 {
2689 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002690 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002691 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2692 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2693 pack->addArg(x.value);
2694 pack->addArg(y.value);
2695 ::basicBlock->appendInst(pack);
2696
2697 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2698 }
2699}
2700
2701RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2702{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002703 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002704 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2705}
2706
2707RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2708{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002709 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002710 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2711}
2712
2713Type *Short4::getType()
2714{
2715 return T(Type_v4i16);
2716}
2717
2718UShort4::UShort4(RValue<Float4> cast, bool saturate)
2719{
2720 if(saturate)
2721 {
2722 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002723 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002724 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2725 // PackUnsigned takes care of 0x0000 saturation.
2726 Int4 int4(Min(cast, Float4(0xFFFF)));
2727 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002728 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002729 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002730 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002731 // ARM saturates the 32-bit integer result on overflow/undeflow.
2732 Int4 int4(cast);
2733 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002734 }
2735 else
2736 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002737 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002738 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002739 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002740 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002741 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002742 *this = Short4(Int4(cast));
2743 }
2744}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002745
Nicolas Capens157ba262019-12-10 17:49:14 -05002746RValue<UShort> Extract(RValue<UShort4> val, int i)
2747{
2748 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2749}
2750
2751RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2752{
2753 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2754}
2755
2756RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2757{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002759 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002760
Nicolas Capens157ba262019-12-10 17:49:14 -05002761 {
2762 UShort4 result;
2763 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2764 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2765 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2766 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2767
2768 return result;
2769 }
2770 else
2771 {
2772 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2773 }
2774}
2775
2776RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2777{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002778 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002779 if(emulateIntrinsics)
2780 {
2781 UShort4 result;
2782 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2783 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2784 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2785 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2786
2787 return result;
2788 }
2789 else
2790 {
2791 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2792 }
2793}
2794
2795RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002798 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2799 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2800 ::basicBlock->appendInst(cmp);
2801
2802 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2803 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2804 ::basicBlock->appendInst(select);
2805
2806 return RValue<UShort4>(V(result));
2807}
2808
2809RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2810{
2811 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2812 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2813 ::basicBlock->appendInst(cmp);
2814
2815 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2816 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2817 ::basicBlock->appendInst(select);
2818
2819 return RValue<UShort4>(V(result));
2820}
2821
2822RValue<UShort> SaturateUnsigned(RValue<Int> x)
2823{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002824 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002825 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2826}
2827
2828RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2829{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002830 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002831 if(emulateIntrinsics)
2832 {
2833 UShort4 result;
2834 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2835 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2836 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2837 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2838
2839 return result;
2840 }
2841 else
2842 {
2843 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002844 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002845 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2846 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2847 paddusw->addArg(x.value);
2848 paddusw->addArg(y.value);
2849 ::basicBlock->appendInst(paddusw);
2850
2851 return RValue<UShort4>(V(result));
2852 }
2853}
2854
2855RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002858 if(emulateIntrinsics)
2859 {
2860 UShort4 result;
2861 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2862 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2863 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2864 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2865
2866 return result;
2867 }
2868 else
2869 {
2870 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002871 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002872 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2873 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2874 psubusw->addArg(x.value);
2875 psubusw->addArg(y.value);
2876 ::basicBlock->appendInst(psubusw);
2877
2878 return RValue<UShort4>(V(result));
2879 }
2880}
2881
2882RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2883{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002884 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002885 if(emulateIntrinsics)
2886 {
2887 UShort4 result;
2888 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2889 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2890 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2891 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2892
2893 return result;
2894 }
2895 else
2896 {
2897 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002898 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002899 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2900 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2901 pmulhuw->addArg(x.value);
2902 pmulhuw->addArg(y.value);
2903 ::basicBlock->appendInst(pmulhuw);
2904
2905 return RValue<UShort4>(V(result));
2906 }
2907}
2908
2909RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2910{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002911 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002912 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2913
2914 // Scalarized implementation.
2915 Int4 result;
2916 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2917 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2918 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2919 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2920
2921 return result;
2922}
2923
2924RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2925{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002926 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002927 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2928
2929 if(false) // Partial product based implementation.
2930 {
2931 auto xh = x >> 16;
2932 auto yh = y >> 16;
2933 auto xl = x & UInt4(0x0000FFFF);
2934 auto yl = y & UInt4(0x0000FFFF);
2935 auto xlyh = xl * yh;
2936 auto xhyl = xh * yl;
2937 auto xlyhh = xlyh >> 16;
2938 auto xhylh = xhyl >> 16;
2939 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2940 auto xhyll = xhyl & UInt4(0x0000FFFF);
2941 auto xlylh = (xl * yl) >> 16;
2942 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2943
2944 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002945 }
2946
Nicolas Capens157ba262019-12-10 17:49:14 -05002947 // Scalarized implementation.
2948 Int4 result;
2949 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2950 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2951 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2952 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2953
2954 return As<UInt4>(result);
2955}
2956
2957RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2958{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002959 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00002960 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002961 return UShort4(0);
2962}
2963
2964Type *UShort4::getType()
2965{
2966 return T(Type_v4i16);
2967}
2968
2969RValue<Short> Extract(RValue<Short8> val, int i)
2970{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002971 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002972 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2973}
2974
2975RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2976{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002977 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002978 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2979}
2980
2981RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2982{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002983 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002984 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002985 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002986 Short8 result;
2987 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2988 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2989 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2990 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2991 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2992 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2993 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2994 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002995
Nicolas Capens157ba262019-12-10 17:49:14 -05002996 return result;
2997 }
2998 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002999 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003000 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003001 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003002}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003003
Nicolas Capens157ba262019-12-10 17:49:14 -05003004RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3005{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003006 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003007 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003008 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003009 Short8 result;
3010 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3011 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3012 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3013 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3014 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3015 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3016 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3017 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003018
Nicolas Capens157ba262019-12-10 17:49:14 -05003019 return result;
3020 }
3021 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003022 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003023 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003024 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003025}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003026
Nicolas Capens157ba262019-12-10 17:49:14 -05003027RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3028{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003029 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003030 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003031 return Int4(0);
3032}
3033
3034RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3035{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003036 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003037 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003038 return Short8(0);
3039}
3040
3041Type *Short8::getType()
3042{
3043 return T(Ice::IceType_v8i16);
3044}
3045
3046RValue<UShort> Extract(RValue<UShort8> val, int i)
3047{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003048 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003049 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3050}
3051
3052RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3053{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003054 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003055 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
3056}
3057
3058RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3059{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003060 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003061 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003062 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003063 UShort8 result;
3064 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3065 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3066 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3067 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3068 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3069 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3070 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3071 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003072
Nicolas Capens157ba262019-12-10 17:49:14 -05003073 return result;
3074 }
3075 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003076 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003077 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003078 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003079}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003080
Nicolas Capens157ba262019-12-10 17:49:14 -05003081RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3082{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003083 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003084 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003085 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003086 UShort8 result;
3087 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3088 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3089 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3090 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3091 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3092 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3093 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3094 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003095
Nicolas Capens157ba262019-12-10 17:49:14 -05003096 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003097 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003098 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003099 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003100 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003101 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003102}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003103
Nicolas Capens157ba262019-12-10 17:49:14 -05003104RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3105{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003106 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003107 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003108 return UShort8(0);
3109}
3110
Nicolas Capens157ba262019-12-10 17:49:14 -05003111Type *UShort8::getType()
3112{
3113 return T(Ice::IceType_v8i16);
3114}
3115
Ben Clayton713b8d32019-12-17 20:37:56 +00003116RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003117{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003118 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003119 RValue<Int> res = val;
3120 val += 1;
3121 return res;
3122}
3123
Ben Clayton713b8d32019-12-17 20:37:56 +00003124const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003126 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003127 val += 1;
3128 return val;
3129}
3130
Ben Clayton713b8d32019-12-17 20:37:56 +00003131RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003132{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003133 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003134 RValue<Int> res = val;
3135 val -= 1;
3136 return res;
3137}
3138
Ben Clayton713b8d32019-12-17 20:37:56 +00003139const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003140{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003141 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003142 val -= 1;
3143 return val;
3144}
3145
3146RValue<Int> RoundInt(RValue<Float> cast)
3147{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003148 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003149 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003150 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003151 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3152 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003153 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003154 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003155 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003156 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003157 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003158 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3159 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3160 nearbyint->addArg(cast.value);
3161 ::basicBlock->appendInst(nearbyint);
3162
3163 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003164 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003165}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003166
Nicolas Capens157ba262019-12-10 17:49:14 -05003167Type *Int::getType()
3168{
3169 return T(Ice::IceType_i32);
3170}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003171
Nicolas Capens157ba262019-12-10 17:49:14 -05003172Type *Long::getType()
3173{
3174 return T(Ice::IceType_i64);
3175}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003176
Nicolas Capens157ba262019-12-10 17:49:14 -05003177UInt::UInt(RValue<Float> cast)
3178{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003179 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003180 // Smallest positive value representable in UInt, but not in Int
3181 const unsigned int ustart = 0x80000000u;
3182 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003183
Nicolas Capens157ba262019-12-10 17:49:14 -05003184 // If the value is negative, store 0, otherwise store the result of the conversion
3185 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003186 // Check if the value can be represented as an Int
3187 IfThenElse(cast >= ustartf,
3188 // If the value is too large, subtract ustart and re-add it after conversion.
3189 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3190 // Otherwise, just convert normally
3191 Int(cast)))
3192 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05003193}
Nicolas Capensa8086512016-11-07 17:32:17 -05003194
Ben Clayton713b8d32019-12-17 20:37:56 +00003195RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003196{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003197 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003198 RValue<UInt> res = val;
3199 val += 1;
3200 return res;
3201}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003202
Ben Clayton713b8d32019-12-17 20:37:56 +00003203const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003204{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003205 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003206 val += 1;
3207 return val;
3208}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003209
Ben Clayton713b8d32019-12-17 20:37:56 +00003210RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003211{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003212 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003213 RValue<UInt> res = val;
3214 val -= 1;
3215 return res;
3216}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003217
Ben Clayton713b8d32019-12-17 20:37:56 +00003218const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003221 val -= 1;
3222 return val;
3223}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003224
Nicolas Capens598f8d82016-09-26 15:09:10 -04003225// RValue<UInt> RoundUInt(RValue<Float> cast)
3226// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003227// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003228// }
3229
Nicolas Capens157ba262019-12-10 17:49:14 -05003230Type *UInt::getType()
3231{
3232 return T(Ice::IceType_i32);
3233}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003234
3235// Int2::Int2(RValue<Int> cast)
3236// {
3237// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
3238// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
3239//
3240// Constant *shuffle[2];
3241// shuffle[0] = Nucleus::createConstantInt(0);
3242// shuffle[1] = Nucleus::createConstantInt(0);
3243//
3244// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
3245//
3246// storeValue(replicate);
3247// }
3248
Nicolas Capens157ba262019-12-10 17:49:14 -05003249RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3250{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003251 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003252 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003253 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003254 Int2 result;
3255 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3256 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003257
Nicolas Capens157ba262019-12-10 17:49:14 -05003258 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003259 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003260 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003261 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003262 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003263 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003264}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003265
Nicolas Capens157ba262019-12-10 17:49:14 -05003266RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003268 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003269 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003270 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003271 Int2 result;
3272 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3273 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3274
3275 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003276 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003277 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003278 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003279 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003280 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003281}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003282
Nicolas Capens157ba262019-12-10 17:49:14 -05003283Type *Int2::getType()
3284{
3285 return T(Type_v2i32);
3286}
3287
3288RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3289{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003290 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003291 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003292 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003293 UInt2 result;
3294 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3295 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003296
Nicolas Capens157ba262019-12-10 17:49:14 -05003297 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003298 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003299 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003300 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003301 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003302 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003303}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003304
Nicolas Capens157ba262019-12-10 17:49:14 -05003305RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3306{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003307 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003308 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003309 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003310 UInt2 result;
3311 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3312 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003313
Nicolas Capens157ba262019-12-10 17:49:14 -05003314 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003315 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003316 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003317 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003318 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003319 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003320}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003321
Nicolas Capens157ba262019-12-10 17:49:14 -05003322Type *UInt2::getType()
3323{
3324 return T(Type_v2i32);
3325}
3326
Ben Clayton713b8d32019-12-17 20:37:56 +00003327Int4::Int4(RValue<Byte4> cast)
3328 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003331 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3332 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3333
3334 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003335 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003336 Value *b = Nucleus::createBitCast(a, Byte16::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003337 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003338
Ben Clayton713b8d32019-12-17 20:37:56 +00003339 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003340 Value *d = Nucleus::createBitCast(c, Short8::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003341 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003342
3343 Value *f = Nucleus::createBitCast(e, Int4::getType());
3344 storeValue(f);
3345}
3346
Ben Clayton713b8d32019-12-17 20:37:56 +00003347Int4::Int4(RValue<SByte4> cast)
3348 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003349{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003350 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003351 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3352 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3353
Ben Clayton713b8d32019-12-17 20:37:56 +00003354 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003355 Value *b = Nucleus::createBitCast(a, Byte16::getType());
3356 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3357
Ben Clayton713b8d32019-12-17 20:37:56 +00003358 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 Value *d = Nucleus::createBitCast(c, Short8::getType());
3360 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3361
3362 *this = As<Int4>(e) >> 24;
3363}
3364
Ben Clayton713b8d32019-12-17 20:37:56 +00003365Int4::Int4(RValue<Short4> cast)
3366 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003367{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003368 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003369 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003370 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3371
3372 *this = As<Int4>(c) >> 16;
3373}
3374
Ben Clayton713b8d32019-12-17 20:37:56 +00003375Int4::Int4(RValue<UShort4> cast)
3376 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003377{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003378 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003379 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003380 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3381 Value *d = Nucleus::createBitCast(c, Int4::getType());
3382 storeValue(d);
3383}
3384
Ben Clayton713b8d32019-12-17 20:37:56 +00003385Int4::Int4(RValue<Int> rhs)
3386 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003387{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003388 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003389 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
3390
Ben Clayton713b8d32019-12-17 20:37:56 +00003391 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003392 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3393
3394 storeValue(replicate);
3395}
3396
3397RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003399 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003400 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003401 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003402 Int4 result;
3403 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3404 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3405 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3406 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003407
Nicolas Capens157ba262019-12-10 17:49:14 -05003408 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003409 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003410 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003411 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003412 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003413 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003414}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003415
Nicolas Capens157ba262019-12-10 17:49:14 -05003416RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003419 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003420 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003421 Int4 result;
3422 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3423 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3424 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3425 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003426
Nicolas Capens157ba262019-12-10 17:49:14 -05003427 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003428 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003429 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003430 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003431 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003432 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003433}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003434
Nicolas Capens157ba262019-12-10 17:49:14 -05003435RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3436{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003437 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003438 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3439}
3440
3441RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3442{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003443 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003444 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3445}
3446
3447RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3448{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003449 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003450 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3451}
3452
3453RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3454{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003455 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003456 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3457}
3458
3459RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3460{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003461 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003462 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3463}
3464
3465RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3466{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003467 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003468 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3469}
3470
3471RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3472{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003473 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003474 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3475 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3476 ::basicBlock->appendInst(cmp);
3477
3478 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3479 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3480 ::basicBlock->appendInst(select);
3481
3482 return RValue<Int4>(V(result));
3483}
3484
3485RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3486{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003487 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003488 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3489 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3490 ::basicBlock->appendInst(cmp);
3491
3492 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3493 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3494 ::basicBlock->appendInst(select);
3495
3496 return RValue<Int4>(V(result));
3497}
3498
3499RValue<Int4> RoundInt(RValue<Float4> cast)
3500{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003501 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003502 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003503 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003504 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3505 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003506 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003507 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003508 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003509 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003510 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003511 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3512 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3513 nearbyint->addArg(cast.value);
3514 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003515
3516 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003517 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003518}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003519
Nicolas Capens157ba262019-12-10 17:49:14 -05003520RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3521{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003522 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003523 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003524 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003525 Short8 result;
3526 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3527 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3528 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3529 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3530 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3531 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3532 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3533 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003534
Nicolas Capens157ba262019-12-10 17:49:14 -05003535 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003536 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003537 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003538 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003539 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003540 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003541 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3542 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3543 pack->addArg(x.value);
3544 pack->addArg(y.value);
3545 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003546
Nicolas Capens157ba262019-12-10 17:49:14 -05003547 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003548 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003549}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003550
Nicolas Capens157ba262019-12-10 17:49:14 -05003551RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3552{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003553 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003554 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003555 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003556 RValue<Int4> sx = As<Int4>(x);
3557 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003558
Nicolas Capens157ba262019-12-10 17:49:14 -05003559 RValue<Int4> sy = As<Int4>(y);
3560 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003561
Nicolas Capens157ba262019-12-10 17:49:14 -05003562 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003563 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003564 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003565 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003566 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003567 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003568 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3569 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3570 pack->addArg(x.value);
3571 pack->addArg(y.value);
3572 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003573
Nicolas Capens157ba262019-12-10 17:49:14 -05003574 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003575 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003576}
Nicolas Capens33438a62017-09-27 11:47:35 -04003577
Nicolas Capens157ba262019-12-10 17:49:14 -05003578RValue<Int> SignMask(RValue<Int4> x)
3579{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003580 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003581 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003582 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3584 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003585 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003586 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003587 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003588 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003589 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003590 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3591 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3592 movmsk->addArg(x.value);
3593 ::basicBlock->appendInst(movmsk);
3594
3595 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003596 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003597}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003598
Nicolas Capens157ba262019-12-10 17:49:14 -05003599Type *Int4::getType()
3600{
3601 return T(Ice::IceType_v4i32);
3602}
3603
Ben Clayton713b8d32019-12-17 20:37:56 +00003604UInt4::UInt4(RValue<Float4> cast)
3605 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003606{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003607 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003608 // Smallest positive value representable in UInt, but not in Int
3609 const unsigned int ustart = 0x80000000u;
3610 const float ustartf = float(ustart);
3611
3612 // Check if the value can be represented as an Int
3613 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3614 // If the value is too large, subtract ustart and re-add it after conversion.
3615 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003616 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003617 (~uiValue & Int4(cast));
3618 // If the value is negative, store 0, otherwise store the result of the conversion
3619 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3620}
3621
Ben Clayton713b8d32019-12-17 20:37:56 +00003622UInt4::UInt4(RValue<UInt> rhs)
3623 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003624{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003625 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003626 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3627
Ben Clayton713b8d32019-12-17 20:37:56 +00003628 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003629 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3630
3631 storeValue(replicate);
3632}
3633
3634RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3635{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003636 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003637 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003638 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003639 UInt4 result;
3640 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3641 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3642 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3643 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003644
Nicolas Capens157ba262019-12-10 17:49:14 -05003645 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003646 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003647 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003648 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003649 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003650 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003651}
Ben Clayton88816fa2019-05-15 17:08:14 +01003652
Nicolas Capens157ba262019-12-10 17:49:14 -05003653RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3654{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003655 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003656 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003657 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003658 UInt4 result;
3659 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3660 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3661 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3662 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003663
Nicolas Capens157ba262019-12-10 17:49:14 -05003664 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003665 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003666 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003667 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003668 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003669 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003670}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003671
Nicolas Capens157ba262019-12-10 17:49:14 -05003672RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3673{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003674 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3676}
3677
3678RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3679{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003680 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003681 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3682}
3683
3684RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3685{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003686 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003687 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3688}
3689
3690RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3691{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003692 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003693 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3694}
3695
3696RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3697{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003698 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003699 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3700}
3701
3702RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3703{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003704 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003705 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3706}
3707
3708RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3709{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003710 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003711 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3712 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3713 ::basicBlock->appendInst(cmp);
3714
3715 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3716 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3717 ::basicBlock->appendInst(select);
3718
3719 return RValue<UInt4>(V(result));
3720}
3721
3722RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3723{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003724 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003725 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3726 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3727 ::basicBlock->appendInst(cmp);
3728
3729 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3730 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3731 ::basicBlock->appendInst(select);
3732
3733 return RValue<UInt4>(V(result));
3734}
3735
3736Type *UInt4::getType()
3737{
3738 return T(Ice::IceType_v4i32);
3739}
3740
3741Type *Half::getType()
3742{
3743 return T(Ice::IceType_i16);
3744}
3745
3746RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003749 return 1.0f / x;
3750}
3751
3752RValue<Float> RcpSqrt_pp(RValue<Float> x)
3753{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003754 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003755 return Rcp_pp(Sqrt(x));
3756}
3757
3758RValue<Float> Sqrt(RValue<Float> x)
3759{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003760 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003761 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003762 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003763 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3764 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3765 sqrt->addArg(x.value);
3766 ::basicBlock->appendInst(sqrt);
3767
3768 return RValue<Float>(V(result));
3769}
3770
3771RValue<Float> Round(RValue<Float> x)
3772{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003773 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003774 return Float4(Round(Float4(x))).x;
3775}
3776
3777RValue<Float> Trunc(RValue<Float> x)
3778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003780 return Float4(Trunc(Float4(x))).x;
3781}
3782
3783RValue<Float> Frac(RValue<Float> x)
3784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003786 return Float4(Frac(Float4(x))).x;
3787}
3788
3789RValue<Float> Floor(RValue<Float> x)
3790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003792 return Float4(Floor(Float4(x))).x;
3793}
3794
3795RValue<Float> Ceil(RValue<Float> x)
3796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003798 return Float4(Ceil(Float4(x))).x;
3799}
3800
3801Type *Float::getType()
3802{
3803 return T(Ice::IceType_f32);
3804}
3805
3806Type *Float2::getType()
3807{
3808 return T(Type_v2f32);
3809}
3810
Ben Clayton713b8d32019-12-17 20:37:56 +00003811Float4::Float4(RValue<Float> rhs)
3812 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003813{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003814 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003815 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3816
Ben Clayton713b8d32019-12-17 20:37:56 +00003817 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003818 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3819
3820 storeValue(replicate);
3821}
3822
3823RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3824{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003825 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003826 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3827 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3828 ::basicBlock->appendInst(cmp);
3829
3830 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3831 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3832 ::basicBlock->appendInst(select);
3833
3834 return RValue<Float4>(V(result));
3835}
3836
3837RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003840 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3841 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3842 ::basicBlock->appendInst(cmp);
3843
3844 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3845 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3846 ::basicBlock->appendInst(select);
3847
3848 return RValue<Float4>(V(result));
3849}
3850
3851RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003854 return Float4(1.0f) / x;
3855}
3856
3857RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003860 return Rcp_pp(Sqrt(x));
3861}
3862
3863RValue<Float4> Sqrt(RValue<Float4> x)
3864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003866 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003867 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003868 Float4 result;
3869 result.x = Sqrt(Float(Float4(x).x));
3870 result.y = Sqrt(Float(Float4(x).y));
3871 result.z = Sqrt(Float(Float4(x).z));
3872 result.w = Sqrt(Float(Float4(x).w));
3873
3874 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003875 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003876 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003877 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003878 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003879 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003880 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3881 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3882 sqrt->addArg(x.value);
3883 ::basicBlock->appendInst(sqrt);
3884
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003885 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003886 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003887}
Nicolas Capens157ba262019-12-10 17:49:14 -05003888
3889RValue<Int> SignMask(RValue<Float4> x)
3890{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003891 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003892 if(emulateIntrinsics || CPUID::ARM)
3893 {
3894 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3895 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3896 }
3897 else
3898 {
3899 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003900 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003901 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3902 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3903 movmsk->addArg(x.value);
3904 ::basicBlock->appendInst(movmsk);
3905
3906 return RValue<Int>(V(result));
3907 }
3908}
3909
3910RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3911{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003912 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003913 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3914}
3915
3916RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3917{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003918 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003919 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3920}
3921
3922RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3923{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003924 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003925 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3926}
3927
3928RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3929{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003930 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003931 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3932}
3933
3934RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3935{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003936 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003937 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3938}
3939
3940RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3941{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003942 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003943 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3944}
3945
3946RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3947{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003948 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003949 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3950}
3951
3952RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3953{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003954 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003955 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3956}
3957
3958RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3959{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003960 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003961 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3962}
3963
3964RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3965{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003966 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003967 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3968}
3969
3970RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3971{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003972 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003973 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3974}
3975
3976RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3977{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003978 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003979 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3980}
3981
3982RValue<Float4> Round(RValue<Float4> x)
3983{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003984 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003985 if(emulateIntrinsics || CPUID::ARM)
3986 {
3987 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3988 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3989 }
3990 else if(CPUID::SSE4_1)
3991 {
3992 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003993 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003994 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3995 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3996 round->addArg(x.value);
3997 round->addArg(::context->getConstantInt32(0));
3998 ::basicBlock->appendInst(round);
3999
4000 return RValue<Float4>(V(result));
4001 }
4002 else
4003 {
4004 return Float4(RoundInt(x));
4005 }
4006}
4007
4008RValue<Float4> Trunc(RValue<Float4> x)
4009{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004010 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004011 if(CPUID::SSE4_1)
4012 {
4013 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004014 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004015 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4016 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4017 round->addArg(x.value);
4018 round->addArg(::context->getConstantInt32(3));
4019 ::basicBlock->appendInst(round);
4020
4021 return RValue<Float4>(V(result));
4022 }
4023 else
4024 {
4025 return Float4(Int4(x));
4026 }
4027}
4028
4029RValue<Float4> Frac(RValue<Float4> x)
4030{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004031 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004032 Float4 frc;
4033
4034 if(CPUID::SSE4_1)
4035 {
4036 frc = x - Floor(x);
4037 }
4038 else
4039 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004040 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004041
Ben Clayton713b8d32019-12-17 20:37:56 +00004042 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004043 }
4044
4045 // x - floor(x) can be 1.0 for very small negative x.
4046 // Clamp against the value just below 1.0.
4047 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4048}
4049
4050RValue<Float4> Floor(RValue<Float4> x)
4051{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004052 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004053 if(CPUID::SSE4_1)
4054 {
4055 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004056 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004057 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4058 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4059 round->addArg(x.value);
4060 round->addArg(::context->getConstantInt32(1));
4061 ::basicBlock->appendInst(round);
4062
4063 return RValue<Float4>(V(result));
4064 }
4065 else
4066 {
4067 return x - Frac(x);
4068 }
4069}
4070
4071RValue<Float4> Ceil(RValue<Float4> x)
4072{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004073 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004074 if(CPUID::SSE4_1)
4075 {
4076 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004077 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004078 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4079 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4080 round->addArg(x.value);
4081 round->addArg(::context->getConstantInt32(2));
4082 ::basicBlock->appendInst(round);
4083
4084 return RValue<Float4>(V(result));
4085 }
4086 else
4087 {
4088 return -Floor(-x);
4089 }
4090}
4091
4092Type *Float4::getType()
4093{
4094 return T(Ice::IceType_v4f32);
4095}
4096
4097RValue<Long> Ticks()
4098{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004099 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004100 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004101 return Long(Int(0));
4102}
4103
Ben Clayton713b8d32019-12-17 20:37:56 +00004104RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004105{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004106 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004107 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004108}
4109
Ben Clayton713b8d32019-12-17 20:37:56 +00004110RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004111{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004112 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004113 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004114}
4115
Ben Clayton713b8d32019-12-17 20:37:56 +00004116Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004117{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004118 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004119 Ice::Variable *ret = nullptr;
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004120 if(retTy != nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004121 {
4122 ret = ::function->makeVariable(T(retTy));
4123 }
4124 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004125 for(auto arg : args)
Nicolas Capens157ba262019-12-10 17:49:14 -05004126 {
4127 call->addArg(V(arg));
4128 }
4129 ::basicBlock->appendInst(call);
4130 return V(ret);
4131}
4132
4133void Breakpoint()
4134{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004135 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004136 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004137 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4138 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4139 ::basicBlock->appendInst(trap);
4140}
4141
Ben Clayton713b8d32019-12-17 20:37:56 +00004142void Nucleus::createFence(std::memory_order memoryOrder)
4143{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004144 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004145 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4146 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4147 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4148 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4149 inst->addArg(order);
4150 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004151}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004152
Ben Clayton713b8d32019-12-17 20:37:56 +00004153Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4154{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004155 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004156 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004157 return nullptr;
4158}
4159void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4160{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004161 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004162 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004163}
Nicolas Capens157ba262019-12-10 17:49:14 -05004164
4165RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4166{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004167 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004168 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4169}
4170
4171RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4172{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004173 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004174 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4175}
4176
4177void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4178{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004179 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004180 return emulated::Scatter(base, val, offsets, mask, alignment);
4181}
4182
4183void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4184{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004185 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004186 return emulated::Scatter(base, val, offsets, mask, alignment);
4187}
4188
4189RValue<Float> Exp2(RValue<Float> x)
4190{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004191 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004192 return emulated::Exp2(x);
4193}
4194
4195RValue<Float> Log2(RValue<Float> x)
4196{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004197 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004198 return emulated::Log2(x);
4199}
4200
4201RValue<Float4> Sin(RValue<Float4> x)
4202{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004203 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004204 return emulated::Sin(x);
4205}
4206
4207RValue<Float4> Cos(RValue<Float4> x)
4208{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004209 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004210 return emulated::Cos(x);
4211}
4212
4213RValue<Float4> Tan(RValue<Float4> x)
4214{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004215 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004216 return emulated::Tan(x);
4217}
4218
4219RValue<Float4> Asin(RValue<Float4> x)
4220{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004221 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004222 return emulated::Asin(x);
4223}
4224
4225RValue<Float4> Acos(RValue<Float4> x)
4226{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004227 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004228 return emulated::Acos(x);
4229}
4230
4231RValue<Float4> Atan(RValue<Float4> x)
4232{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004233 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004234 return emulated::Atan(x);
4235}
4236
4237RValue<Float4> Sinh(RValue<Float4> x)
4238{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004239 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004240 return emulated::Sinh(x);
4241}
4242
4243RValue<Float4> Cosh(RValue<Float4> x)
4244{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004245 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004246 return emulated::Cosh(x);
4247}
4248
4249RValue<Float4> Tanh(RValue<Float4> x)
4250{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004251 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004252 return emulated::Tanh(x);
4253}
4254
4255RValue<Float4> Asinh(RValue<Float4> x)
4256{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004257 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004258 return emulated::Asinh(x);
4259}
4260
4261RValue<Float4> Acosh(RValue<Float4> x)
4262{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004263 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004264 return emulated::Acosh(x);
4265}
4266
4267RValue<Float4> Atanh(RValue<Float4> x)
4268{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004269 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004270 return emulated::Atanh(x);
4271}
4272
4273RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4274{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004275 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004276 return emulated::Atan2(x, y);
4277}
4278
4279RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4280{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004281 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004282 return emulated::Pow(x, y);
4283}
4284
4285RValue<Float4> Exp(RValue<Float4> x)
4286{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004287 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004288 return emulated::Exp(x);
4289}
4290
4291RValue<Float4> Log(RValue<Float4> x)
4292{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004293 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004294 return emulated::Log(x);
4295}
4296
4297RValue<Float4> Exp2(RValue<Float4> x)
4298{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004299 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004300 return emulated::Exp2(x);
4301}
4302
4303RValue<Float4> Log2(RValue<Float4> x)
4304{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004305 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004306 return emulated::Log2(x);
4307}
4308
4309RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004311 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004312 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 {
Ben Claytonce54c592020-02-07 11:30:51 +00004314 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004315 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004316 }
4317 else
4318 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004319 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004320 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4321 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4322 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4323 ctlz->addArg(x.value);
4324 ::basicBlock->appendInst(ctlz);
4325
4326 return RValue<UInt>(V(result));
4327 }
4328}
4329
4330RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4331{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004332 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004333 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004334 {
Ben Claytonce54c592020-02-07 11:30:51 +00004335 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004336 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004337 }
4338 else
4339 {
4340 // TODO: implement vectorized version in Subzero
4341 UInt4 result;
4342 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4343 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4344 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4345 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4346 return result;
4347 }
4348}
4349
4350RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4351{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004352 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004353 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004354 {
Ben Claytonce54c592020-02-07 11:30:51 +00004355 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004356 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004357 }
4358 else
4359 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004360 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004361 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4362 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4363 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4364 ctlz->addArg(x.value);
4365 ::basicBlock->appendInst(ctlz);
4366
4367 return RValue<UInt>(V(result));
4368 }
4369}
4370
4371RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4372{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004373 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004374 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004375 {
Ben Claytonce54c592020-02-07 11:30:51 +00004376 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004377 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004378 }
4379 else
4380 {
4381 // TODO: implement vectorized version in Subzero
4382 UInt4 result;
4383 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4384 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4385 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4386 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4387 return result;
4388 }
4389}
4390
Antonio Maiorano370cba52019-12-31 11:36:07 -05004391RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4392{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004393 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004394 return emulated::MinAtomic(x, y, memoryOrder);
4395}
4396
4397RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004399 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004400 return emulated::MinAtomic(x, y, memoryOrder);
4401}
4402
4403RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4404{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004405 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004406 return emulated::MaxAtomic(x, y, memoryOrder);
4407}
4408
4409RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4410{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004411 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004412 return emulated::MaxAtomic(x, y, memoryOrder);
4413}
4414
Antonio Maioranoaae33732020-02-14 14:52:34 -05004415void EmitDebugLocation()
4416{
4417#ifdef ENABLE_RR_DEBUG_INFO
4418# ifdef ENABLE_RR_EMIT_PRINT_LOCATION
4419 emitPrintLocation(getCallerBacktrace());
4420# endif // ENABLE_RR_EMIT_PRINT_LOCATION
4421#endif // ENABLE_RR_DEBUG_INFO
4422}
Ben Clayton713b8d32019-12-17 20:37:56 +00004423void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004424void FlushDebug() {}
4425
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004426namespace {
4427namespace coro {
4428
4429using FiberHandle = void *;
4430
4431// Instance data per generated coroutine
4432// This is the "handle" type used for Coroutine functions
4433// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4434struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004435{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004436 FiberHandle mainFiber{};
4437 FiberHandle routineFiber{};
4438 bool convertedFiber = false;
4439
4440 // Variables used by coroutines
4441 bool done = false;
4442 void *promisePtr = nullptr;
4443};
4444
4445CoroutineData *createCoroutineData()
4446{
4447 return new CoroutineData{};
4448}
4449
4450void destroyCoroutineData(CoroutineData *coroData)
4451{
4452 delete coroData;
4453}
4454
4455void convertThreadToMainFiber(Nucleus::CoroutineHandle handle)
4456{
4457#if defined(_WIN32)
4458 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4459
4460 coroData->mainFiber = ::ConvertThreadToFiber(nullptr);
4461
4462 if(coroData->mainFiber)
4463 {
4464 coroData->convertedFiber = true;
4465 }
4466 else
4467 {
4468 // We're probably already on a fiber, so just grab it and remember that we didn't
4469 // convert it, so not to convert back to thread.
4470 coroData->mainFiber = GetCurrentFiber();
4471 coroData->convertedFiber = false;
4472 }
4473 ASSERT(coroData->mainFiber);
4474#else
Ben Claytonce54c592020-02-07 11:30:51 +00004475 UNIMPLEMENTED_NO_BUG("convertThreadToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004476#endif
4477}
4478
4479void convertMainFiberToThread(Nucleus::CoroutineHandle handle)
4480{
4481#if defined(_WIN32)
4482 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4483
4484 ASSERT(coroData->mainFiber);
4485
4486 if(coroData->convertedFiber)
4487 {
4488 ::ConvertFiberToThread();
4489 coroData->mainFiber = nullptr;
4490 }
4491#else
Ben Claytonce54c592020-02-07 11:30:51 +00004492 UNIMPLEMENTED_NO_BUG("convertMainFiberToThread not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004493#endif
4494}
4495using FiberFunc = std::function<void()>;
4496
4497void createRoutineFiber(Nucleus::CoroutineHandle handle, FiberFunc *fiberFunc)
4498{
4499#if defined(_WIN32)
4500 struct Invoker
4501 {
4502 FiberFunc func;
4503
4504 static VOID __stdcall fiberEntry(LPVOID lpParameter)
4505 {
4506 auto *func = reinterpret_cast<FiberFunc *>(lpParameter);
4507 (*func)();
4508 }
4509 };
4510
4511 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4512
4513 constexpr SIZE_T StackSize = 2 * 1024 * 1024;
4514 coroData->routineFiber = ::CreateFiber(StackSize, &Invoker::fiberEntry, fiberFunc);
4515 ASSERT(coroData->routineFiber);
4516#else
Ben Claytonce54c592020-02-07 11:30:51 +00004517 UNIMPLEMENTED_NO_BUG("createRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004518#endif
4519}
4520
4521void deleteRoutineFiber(Nucleus::CoroutineHandle handle)
4522{
4523#if defined(_WIN32)
4524 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4525 ASSERT(coroData->routineFiber);
4526 ::DeleteFiber(coroData->routineFiber);
4527 coroData->routineFiber = nullptr;
4528#else
Ben Claytonce54c592020-02-07 11:30:51 +00004529 UNIMPLEMENTED_NO_BUG("deleteRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004530#endif
4531}
4532
4533void switchToMainFiber(Nucleus::CoroutineHandle handle)
4534{
4535#if defined(_WIN32)
4536 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4537
4538 // Win32
4539 ASSERT(coroData->mainFiber);
4540 ::SwitchToFiber(coroData->mainFiber);
4541#else
Ben Claytonce54c592020-02-07 11:30:51 +00004542 UNIMPLEMENTED_NO_BUG("switchToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004543#endif
4544}
4545
4546void switchToRoutineFiber(Nucleus::CoroutineHandle handle)
4547{
4548#if defined(_WIN32)
4549 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4550
4551 // Win32
4552 ASSERT(coroData->routineFiber);
4553 ::SwitchToFiber(coroData->routineFiber);
4554#else
Ben Claytonce54c592020-02-07 11:30:51 +00004555 UNIMPLEMENTED_NO_BUG("switchToRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004556#endif
4557}
4558
4559namespace detail {
4560thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4561} // namespace detail
4562
4563void setHandleParam(Nucleus::CoroutineHandle handle)
4564{
4565 ASSERT(!detail::coroHandle);
4566 detail::coroHandle = handle;
4567}
4568
4569Nucleus::CoroutineHandle getHandleParam()
4570{
4571 ASSERT(detail::coroHandle);
4572 auto handle = detail::coroHandle;
4573 detail::coroHandle = {};
4574 return handle;
4575}
4576
4577void setDone(Nucleus::CoroutineHandle handle)
4578{
4579 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4580 ASSERT(!coroData->done); // Should be called once
4581 coroData->done = true;
4582}
4583
4584bool isDone(Nucleus::CoroutineHandle handle)
4585{
4586 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4587 return coroData->done;
4588}
4589
4590void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4591{
4592 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4593 coroData->promisePtr = promisePtr;
4594}
4595
4596void *getPromisePtr(Nucleus::CoroutineHandle handle)
4597{
4598 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4599 return coroData->promisePtr;
4600}
4601
4602} // namespace coro
4603} // namespace
4604
4605// Used to generate coroutines.
4606// Lifetime: from yield to acquireCoroutine
4607class CoroutineGenerator
4608{
4609public:
4610 CoroutineGenerator()
4611 {
4612 }
4613
4614 // Inserts instructions at the top of the current function to make it a coroutine.
4615 void generateCoroutineBegin()
4616 {
4617 // Begin building the main coroutine_begin() function.
4618 // We insert these instructions at the top of the entry node,
4619 // before existing reactor-generated instructions.
4620
4621 // CoroutineHandle coroutine_begin(<Arguments>)
4622 // {
4623 // this->handle = coro::getHandleParam();
4624 //
4625 // YieldType promise;
4626 // coro::setPromisePtr(handle, &promise); // For await
4627 //
4628 // ... <REACTOR CODE> ...
4629 //
4630
4631 // Save original entry block and current block, and create a new entry block and make it current.
4632 // This new block will be used to inject code above the begin routine's existing code. We make
4633 // this block branch to the original entry block as the last instruction.
4634 auto origEntryBB = ::function->getEntryNode();
4635 auto origCurrBB = ::basicBlock;
4636 auto newBB = ::function->makeNode();
4637 sz::replaceEntryNode(::function, newBB);
4638 ::basicBlock = newBB;
4639
4640 // this->handle = coro::getHandleParam();
4641 this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
4642
4643 // YieldType promise;
4644 // coro::setPromisePtr(handle, &promise); // For await
4645 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
4646 sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
4647
4648 // Branch to original entry block
4649 auto br = Ice::InstBr::create(::function, origEntryBB);
4650 ::basicBlock->appendInst(br);
4651
4652 // Restore current block for future instructions
4653 ::basicBlock = origCurrBB;
4654 }
4655
4656 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4657 void generateYield(Value *val)
4658 {
4659 // ... <REACTOR CODE> ...
4660 //
4661 // promise = val;
4662 // coro::switchToMainFiber(handle);
4663 //
4664 // ... <REACTOR CODE> ...
4665
4666 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
4667 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4668 }
4669
4670 // Adds instructions at the end of the current main coroutine function to end the coroutine.
4671 void generateCoroutineEnd()
4672 {
4673 // ... <REACTOR CODE> ...
4674 //
4675 // coro::setDone(handle);
4676 // coro::switchToMainFiber();
4677 // // Unreachable
4678 // }
4679 //
4680
4681 sz::Call(::function, ::basicBlock, coro::setDone, this->handle);
4682
4683 // A Win32 Fiber function must not end, otherwise it tears down the thread it's running on.
4684 // So we add code to switch back to the main thread.
4685 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4686 }
4687
4688 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4689
4690 // Generates the await function for the current coroutine.
4691 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4692 static FunctionUniquePtr generateAwaitFunction()
4693 {
4694 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4695 // {
4696 // if (coro::isDone())
4697 // {
4698 // return false;
4699 // }
4700 // else // resume
4701 // {
4702 // YieldType* promise = coro::getPromisePtr(handle);
4703 // *out = *promise;
4704 // coro::switchToRoutineFiber(handle);
4705 // return true;
4706 // }
4707 // }
4708
4709 // Subzero doesn't support bool types (IceType_i1) as return type
4710 const Ice::Type ReturnType = Ice::IceType_i32;
4711 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4712 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4713
4714 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4715 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4716
4717 Ice::Variable *handle = awaitFunc->getArgs()[0];
4718 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4719
4720 auto doneBlock = awaitFunc->makeNode();
4721 {
4722 // return false;
4723 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4724 doneBlock->appendInst(ret);
4725 }
4726
4727 auto resumeBlock = awaitFunc->makeNode();
4728 {
4729 // YieldType* promise = coro::getPromisePtr(handle);
4730 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4731
4732 // *out = *promise;
4733 // Load promise value
4734 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4735 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4736 resumeBlock->appendInst(load);
4737 // Then store it in output param
4738 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4739 resumeBlock->appendInst(store);
4740
4741 // coro::switchToRoutineFiber(handle);
4742 sz::Call(awaitFunc, resumeBlock, coro::switchToRoutineFiber, handle);
4743
4744 // return true;
4745 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4746 resumeBlock->appendInst(ret);
4747 }
4748
4749 // if (coro::isDone())
4750 // {
4751 // <doneBlock>
4752 // }
4753 // else // resume
4754 // {
4755 // <resumeBlock>
4756 // }
4757 Ice::CfgNode *bb = awaitFunc->getEntryNode();
4758 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
4759 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4760 bb->appendInst(br);
4761
4762 return FunctionUniquePtr{ awaitFunc };
4763 }
4764
4765 // Generates the destroy function for the current coroutine.
4766 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4767 static FunctionUniquePtr generateDestroyFunction()
4768 {
4769 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4770 // {
4771 // coro::convertMainFiberToThread(coroData);
4772 // coro::deleteRoutineFiber(handle);
4773 // coro::destroyCoroutineData(handle);
4774 // return;
4775 // }
4776
4777 const Ice::Type ReturnType = Ice::IceType_void;
4778 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4779
4780 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4781 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4782
4783 Ice::Variable *handle = destroyFunc->getArgs()[0];
4784
4785 auto *bb = destroyFunc->getEntryNode();
4786
4787 // coro::convertMainFiberToThread(coroData);
4788 sz::Call(destroyFunc, bb, coro::convertMainFiberToThread, handle);
4789
4790 // coro::deleteRoutineFiber(handle);
4791 sz::Call(destroyFunc, bb, coro::deleteRoutineFiber, handle);
4792
4793 // coro::destroyCoroutineData(handle);
4794 sz::Call(destroyFunc, bb, coro::destroyCoroutineData, handle);
4795
4796 // return;
4797 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4798 bb->appendInst(ret);
4799
4800 return FunctionUniquePtr{ destroyFunc };
4801 }
4802
4803private:
4804 Ice::Variable *handle{};
4805 Ice::Variable *promise{};
4806};
4807
4808static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4809{
4810 // This doubles up as our coroutine handle
4811 auto coroData = coro::createCoroutineData();
4812
4813 // Convert current thread to a fiber so we can create new fibers and switch to them
4814 coro::convertThreadToMainFiber(coroData);
4815
4816 coro::FiberFunc fiberFunc = [&]() {
4817 // Store handle in TLS so that the coroutine can grab it right away, before
4818 // any fiber switch occurs.
4819 coro::setHandleParam(coroData);
4820
4821 // Invoke the begin function in the context of the routine fiber
4822 beginFunc();
4823
4824 // Either it yielded, or finished. In either case, we switch back to the main fiber.
4825 // We don't ever return from this function, or the current thread will be destroyed.
4826 coro::switchToMainFiber(coroData);
4827 };
4828
4829 coro::createRoutineFiber(coroData, &fiberFunc);
4830
4831 // Fiber will now start running, executing the saved beginFunc
4832 coro::switchToRoutineFiber(coroData);
4833
4834 return coroData;
4835}
4836
4837void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4838{
4839 // Start by creating a regular function
4840 createFunction(yieldType, params);
4841
4842 // Save in case yield() is called
4843 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4844 ::coroYieldType = yieldType;
4845}
4846
4847void Nucleus::yield(Value *val)
4848{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004849 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004850 Variable::materializeAll();
4851
4852 // On first yield, we start generating coroutine functions
4853 if(!::coroGen)
4854 {
4855 ::coroGen = std::make_shared<CoroutineGenerator>();
4856 ::coroGen->generateCoroutineBegin();
4857 }
4858
4859 ASSERT(::coroGen);
4860 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004861}
4862
Ben Clayton713b8d32019-12-17 20:37:56 +00004863static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4864{
4865 return false;
4866}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004867
4868static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4869{
4870}
Nicolas Capens157ba262019-12-10 17:49:14 -05004871
4872std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4873{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004874 if(::coroGen)
4875 {
4876 // Finish generating coroutine functions
4877 {
4878 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4879 ::coroGen->generateCoroutineEnd();
4880 createRetVoidIfNoRet();
4881 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004882
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004883 auto awaitFunc = ::coroGen->generateAwaitFunction();
4884 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004885
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004886 // At this point, we no longer need the CoroutineGenerator.
4887 ::coroGen.reset();
4888 ::coroYieldType = nullptr;
4889
4890 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4891 { name, "await", "destroy" },
4892 cfgEdit);
4893
4894 return routine;
4895 }
4896 else
4897 {
4898 {
4899 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4900 createRetVoidIfNoRet();
4901 }
4902
4903 ::coroYieldType = nullptr;
4904
4905 // Not an actual coroutine (no yields), so return stubs for await and destroy
4906 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4907
4908 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4909 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4910 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4911 return routine;
4912 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004913}
4914
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004915Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004916{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004917 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4918
4919 if(isCoroutine)
4920 {
4921 return rr::invokeCoroutineBegin(func);
4922 }
4923 else
4924 {
4925 // For regular routines, just invoke the begin func directly
4926 return func();
4927 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004928}
Nicolas Capens157ba262019-12-10 17:49:14 -05004929
4930} // namespace rr