blob: 86838628f2e1ab15d02748ced7533ea173975339 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Nicolas Capens598f8d82016-09-26 15:09:10 -040015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050017#include "EmulatedReactor.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040018
Nicolas Capens2ae9d742016-11-24 14:43:05 -050019#include "Optimizer.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040020#include "ExecutableMemory.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040021
Nicolas Capens598f8d82016-09-26 15:09:10 -040022#include "src/IceTypes.h"
23#include "src/IceCfg.h"
24#include "src/IceELFStreamer.h"
25#include "src/IceGlobalContext.h"
26#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040028#include "src/IceGlobalInits.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040029
30#include "llvm/Support/FileSystem.h"
31#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040032#include "llvm/Support/Compiler.h"
33
34#if __has_feature(memory_sanitizer)
35#include <sanitizer/msan_interface.h>
36#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040037
Nicolas Capensbd65da92017-01-05 16:31:06 -050038#if defined(_WIN32)
Alexis Hetu113e33a2017-01-19 10:49:19 -050039#ifndef WIN32_LEAN_AND_MEAN
Nicolas Capens598f8d82016-09-26 15:09:10 -040040#define WIN32_LEAN_AND_MEAN
Alexis Hetu113e33a2017-01-19 10:49:19 -050041#endif // !WIN32_LEAN_AND_MEAN
42#ifndef NOMINMAX
Nicolas Capens598f8d82016-09-26 15:09:10 -040043#define NOMINMAX
Alexis Hetu113e33a2017-01-19 10:49:19 -050044#endif // !NOMINMAX
Nicolas Capens598f8d82016-09-26 15:09:10 -040045#include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050046#else
47#include <sys/mman.h>
Nicolas Capens411273e2017-01-26 15:13:36 -080048#if !defined(MAP_ANONYMOUS)
49#define MAP_ANONYMOUS MAP_ANON
Nicolas Capens8b275742017-01-20 17:11:41 -050050#endif
Nicolas Capensbd65da92017-01-05 16:31:06 -050051#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040052
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040053#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040054#include <limits>
55#include <iostream>
Nicolas Capens598f8d82016-09-26 15:09:10 -040056
Nicolas Capens157ba262019-12-10 17:49:14 -050057namespace rr { class ELFMemoryStreamer; }
58
59namespace {
60
61// Default configuration settings. Must be accessed under mutex lock.
62std::mutex defaultConfigLock;
63rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +000064{
Nicolas Capens157ba262019-12-10 17:49:14 -050065 // This uses a static in a function to avoid the cost of a global static
66 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
67 static rr::Config config = rr::Config::Edit()
68 .apply({});
69 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +000070}
71
Nicolas Capens157ba262019-12-10 17:49:14 -050072Ice::GlobalContext *context = nullptr;
73Ice::Cfg *function = nullptr;
74Ice::CfgNode *basicBlock = nullptr;
75Ice::CfgLocalAllocatorScope *allocator = nullptr;
76rr::ELFMemoryStreamer *routine = nullptr;
77
78std::mutex codegenMutex;
79
80Ice::ELFFileStreamer *elfFile = nullptr;
81Ice::Fdstream *out = nullptr;
82
83} // Anonymous namespace
84
85namespace {
86
87#if !defined(__i386__) && defined(_M_IX86)
88 #define __i386__ 1
89#endif
90
91#if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
92 #define __x86_64__ 1
93#endif
94
95static Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -040096{
Nicolas Capens157ba262019-12-10 17:49:14 -050097 switch (level)
Ben Clayton55bc37a2019-07-04 12:17:12 +010098 {
Nicolas Capens157ba262019-12-10 17:49:14 -050099 // Note that Opt_0 and Opt_1 are not implemented by Subzero
100 case rr::Optimization::Level::None: return Ice::Opt_m1;
101 case rr::Optimization::Level::Less: return Ice::Opt_m1;
102 case rr::Optimization::Level::Default: return Ice::Opt_2;
103 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
104 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100105 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500106 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400107}
108
Nicolas Capens157ba262019-12-10 17:49:14 -0500109class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500110{
Nicolas Capens157ba262019-12-10 17:49:14 -0500111public:
112 const static bool ARM;
113 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400114
Nicolas Capens157ba262019-12-10 17:49:14 -0500115private:
116 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100117 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500118 #if defined(__i386__) || defined(__x86_64__)
119 #if defined(_WIN32)
120 __cpuid(registers, info);
121 #else
122 __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
123 #endif
124 #else
125 registers[0] = 0;
126 registers[1] = 0;
127 registers[2] = 0;
128 registers[3] = 0;
129 #endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100130 }
131
Nicolas Capens157ba262019-12-10 17:49:14 -0500132 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500133 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500134 #if defined(__arm__) || defined(__aarch64__)
135 return true;
136 #elif defined(__i386__) || defined(__x86_64__)
137 return false;
138 #elif defined(__mips__)
139 return false;
140 #else
141 #error "Unknown architecture"
142 #endif
143 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500144
Nicolas Capens157ba262019-12-10 17:49:14 -0500145 static bool detectSSE4_1()
146 {
147 #if defined(__i386__) || defined(__x86_64__)
148 int registers[4];
149 cpuid(registers, 1);
150 return (registers[2] & 0x00080000) != 0;
151 #else
152 return false;
153 #endif
154 }
155};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500156
Nicolas Capens157ba262019-12-10 17:49:14 -0500157const bool CPUID::ARM = CPUID::detectARM();
158const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
159const bool emulateIntrinsics = false;
160const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400161
Nicolas Capens157ba262019-12-10 17:49:14 -0500162constexpr bool subzeroDumpEnabled = false;
163constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500164
165#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500166static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
167static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500168#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500169
170} // anonymous namespace
171
172namespace rr {
173
174const Capabilities Caps =
175{
176 false, // CoroutinesSupported
177};
178
179enum EmulatedType
180{
181 EmulatedShift = 16,
182 EmulatedV2 = 2 << EmulatedShift,
183 EmulatedV4 = 4 << EmulatedShift,
184 EmulatedV8 = 8 << EmulatedShift,
185 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
186
187 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
188 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
189 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
190 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
191 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
192 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
193};
194
195class Value : public Ice::Operand {};
196class SwitchCases : public Ice::InstSwitch {};
197class BasicBlock : public Ice::CfgNode {};
198
199Ice::Type T(Type *t)
200{
201 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
202 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500203}
204
Nicolas Capens157ba262019-12-10 17:49:14 -0500205Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400206{
Nicolas Capens157ba262019-12-10 17:49:14 -0500207 return reinterpret_cast<Type*>(t);
208}
209
210Type *T(EmulatedType t)
211{
212 return reinterpret_cast<Type*>(t);
213}
214
215Value *V(Ice::Operand *v)
216{
217 return reinterpret_cast<Value*>(v);
218}
219
220BasicBlock *B(Ice::CfgNode *b)
221{
222 return reinterpret_cast<BasicBlock*>(b);
223}
224
225static size_t typeSize(Type *type)
226{
227 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400228 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500229 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800230 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500231 case Type_v2i32: return 8;
232 case Type_v4i16: return 8;
233 case Type_v2i16: return 4;
234 case Type_v8i8: return 8;
235 case Type_v4i8: return 4;
236 case Type_v2f32: return 8;
237 default: ASSERT(false);
238 }
239 }
240
241 return Ice::typeWidthInBytes(T(type));
242}
243
244using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
245using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
246
247inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
248{
249 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
250}
251
252inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
253{
254 return &sectionHeader(elfHeader)[index];
255}
256
257static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
258{
259 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
260
261 uint32_t index = relocation.getSymbol();
262 int table = relocationTable.sh_link;
263 void *symbolValue = nullptr;
264
265 if(index != SHN_UNDEF)
266 {
267 if(table == SHN_UNDEF) return nullptr;
268 const SectionHeader *symbolTable = elfSection(elfHeader, table);
269
270 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
271 if(index >= symtab_entries)
272 {
273 ASSERT(index < symtab_entries && "Symbol Index out of range");
274 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800275 }
276
Nicolas Capens157ba262019-12-10 17:49:14 -0500277 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
278 Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
279 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800280
Nicolas Capens157ba262019-12-10 17:49:14 -0500281 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400282 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500283 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
284 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400285 }
286 else
287 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500288 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400289 }
Nicolas Capens66478362016-10-13 15:36:36 -0400290 }
291
Nicolas Capens157ba262019-12-10 17:49:14 -0500292 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
293 unaligned_ptr<int32_t> patchSite = (int32_t*)(address + relocation.r_offset);
294
295 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400296 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400297 switch(relocation.getType())
298 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500299 case R_ARM_NONE:
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400300 // No relocation
301 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500302 case R_ARM_MOVW_ABS_NC:
303 {
304 uint32_t thumb = 0; // Calls to Thumb code not supported.
305 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
306 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
307 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400308 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500309 case R_ARM_MOVT_ABS:
310 {
311 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
312 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
313 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400314 break;
315 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400316 ASSERT(false && "Unsupported relocation type");
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400317 return nullptr;
318 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500319 }
320 else
321 {
322 switch(relocation.getType())
323 {
324 case R_386_NONE:
325 // No relocation
326 break;
327 case R_386_32:
328 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
329 break;
330 case R_386_PC32:
331 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
332 break;
333 default:
334 ASSERT(false && "Unsupported relocation type");
335 return nullptr;
336 }
Nicolas Capens66478362016-10-13 15:36:36 -0400337 }
338
Nicolas Capens157ba262019-12-10 17:49:14 -0500339 return symbolValue;
340}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400341
Nicolas Capens157ba262019-12-10 17:49:14 -0500342static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
343{
344 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
345
346 uint32_t index = relocation.getSymbol();
347 int table = relocationTable.sh_link;
348 void *symbolValue = nullptr;
349
350 if(index != SHN_UNDEF)
351 {
352 if(table == SHN_UNDEF) return nullptr;
353 const SectionHeader *symbolTable = elfSection(elfHeader, table);
354
355 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
356 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400357 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500358 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400359 return nullptr;
360 }
361
Nicolas Capens157ba262019-12-10 17:49:14 -0500362 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
363 Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
364 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400365
Nicolas Capens157ba262019-12-10 17:49:14 -0500366 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400367 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500368 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
369 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
370 }
371 else
372 {
373 return nullptr;
374 }
375 }
Nicolas Capens66478362016-10-13 15:36:36 -0400376
Nicolas Capens157ba262019-12-10 17:49:14 -0500377 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
378 unaligned_ptr<int32_t> patchSite32 = (int32_t*)(address + relocation.r_offset);
379 unaligned_ptr<int64_t> patchSite64 = (int64_t*)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400380
Nicolas Capens157ba262019-12-10 17:49:14 -0500381 switch(relocation.getType())
382 {
383 case R_X86_64_NONE:
384 // No relocation
385 break;
386 case R_X86_64_64:
387 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
388 break;
389 case R_X86_64_PC32:
390 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
391 break;
392 case R_X86_64_32S:
393 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
394 break;
395 default:
396 ASSERT(false && "Unsupported relocation type");
397 return nullptr;
398 }
399
400 return symbolValue;
401}
402
403void *loadImage(uint8_t *const elfImage, size_t &codeSize)
404{
405 ElfHeader *elfHeader = (ElfHeader*)elfImage;
406
407 if(!elfHeader->checkMagic())
408 {
409 return nullptr;
410 }
411
412 // Expect ELF bitness to match platform
413 ASSERT(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
414 #if defined(__i386__)
415 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
416 #elif defined(__x86_64__)
417 ASSERT(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
418 #elif defined(__arm__)
419 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
420 #elif defined(__aarch64__)
421 ASSERT(sizeof(void*) == 8 && elfHeader->e_machine == EM_AARCH64);
422 #elif defined(__mips__)
423 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_MIPS);
424 #else
425 #error "Unsupported platform"
426 #endif
427
428 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
429 void *entry = nullptr;
430
431 for(int i = 0; i < elfHeader->e_shnum; i++)
432 {
433 if(sectionHeader[i].sh_type == SHT_PROGBITS)
434 {
435 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
436 {
437 entry = elfImage + sectionHeader[i].sh_offset;
438 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400439 }
440 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500441 else if(sectionHeader[i].sh_type == SHT_REL)
442 {
443 ASSERT(sizeof(void*) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400444
Nicolas Capens157ba262019-12-10 17:49:14 -0500445 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
446 {
447 const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
448 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
449 }
450 }
451 else if(sectionHeader[i].sh_type == SHT_RELA)
452 {
453 ASSERT(sizeof(void*) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
454
455 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
456 {
457 const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
458 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
459 }
460 }
461 }
462
463 return entry;
464}
465
466template<typename T>
467struct ExecutableAllocator
468{
469 ExecutableAllocator() {}
470 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {}
471
472 using value_type = T;
473 using size_type = std::size_t;
474
475 T *allocate(size_type n)
476 {
477 return (T*)allocateExecutable(sizeof(T) * n);
478 }
479
480 void deallocate(T *p, size_type n)
481 {
482 deallocateExecutable(p, sizeof(T) * n);
483 }
484};
485
486class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
487{
488 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
489 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
490
491public:
492 ELFMemoryStreamer() : Routine()
493 {
494 position = 0;
495 buffer.reserve(0x1000);
496 }
497
498 ~ELFMemoryStreamer() override
499 {
500 #if defined(_WIN32)
501 if(buffer.size() != 0)
502 {
503 DWORD exeProtection;
504 VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
505 }
506 #endif
507 }
508
509 void write8(uint8_t Value) override
510 {
511 if(position == (uint64_t)buffer.size())
512 {
513 buffer.push_back(Value);
514 position++;
515 }
516 else if(position < (uint64_t)buffer.size())
517 {
518 buffer[position] = Value;
519 position++;
520 }
521 else ASSERT(false && "UNIMPLEMENTED");
522 }
523
524 void writeBytes(llvm::StringRef Bytes) override
525 {
526 std::size_t oldSize = buffer.size();
527 buffer.resize(oldSize + Bytes.size());
528 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
529 position += Bytes.size();
530 }
531
532 uint64_t tell() const override { return position; }
533
534 void seek(uint64_t Off) override { position = Off; }
535
536 const void* finalizeEntryBegin()
537 {
538 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
539
540 size_t codeSize = 0;
541 const void *entry = loadImage(&buffer[0], codeSize);
542
543#if defined(_WIN32)
544 VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READ, &oldProtection);
545 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
546#else
547 mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_EXEC);
548 __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
549#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -0400550 return entry;
551 }
552
Nicolas Capens157ba262019-12-10 17:49:14 -0500553 void setEntry(int index, const void* func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400554 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500555 ASSERT(func);
556 funcs[index] = func;
557 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400558
Nicolas Capens157ba262019-12-10 17:49:14 -0500559 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400560 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500561 ASSERT(funcs[index]);
562 return funcs[index];
563 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400564
Nicolas Capens157ba262019-12-10 17:49:14 -0500565 const void* addConstantData(const void* data, size_t size)
566 {
567 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[size]);
568 memcpy(buf.get(), data, size);
569 auto ptr = buf.get();
570 constantData.emplace_back(std::move(buf));
571 return ptr;
572 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400573
Nicolas Capens157ba262019-12-10 17:49:14 -0500574private:
575 std::array<const void*, Nucleus::CoroutineEntryCount> funcs = {};
576 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
577 std::size_t position;
578 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400579
Nicolas Capens157ba262019-12-10 17:49:14 -0500580 #if defined(_WIN32)
581 DWORD oldProtection;
582 #endif
583};
584
585Nucleus::Nucleus()
586{
587 ::codegenMutex.lock(); // Reactor is currently not thread safe
588
589 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
590 Ice::ClFlags::getParsedClFlags(Flags);
591
592 #if defined(__arm__)
593 Flags.setTargetArch(Ice::Target_ARM32);
594 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
595 #elif defined(__mips__)
596 Flags.setTargetArch(Ice::Target_MIPS32);
597 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
598 #else // x86
599 Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
600 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
601 #endif
602 Flags.setOutFileType(Ice::FT_Elf);
603 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
604 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
605 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
606 Flags.setDisableHybridAssembly(true);
607
608 static llvm::raw_os_ostream cout(std::cout);
609 static llvm::raw_os_ostream cerr(std::cerr);
610
611 if (subzeroEmitTextAsm)
612 {
613 // Decorate text asm with liveness info
614 Flags.setDecorateAsm(true);
615 }
616
617 if(false) // Write out to a file
618 {
619 std::error_code errorCode;
620 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
621 ::elfFile = new Ice::ELFFileStreamer(*out);
622 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
623 }
624 else
625 {
626 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
627 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
628 ::routine = elfMemory;
629 }
630}
631
632Nucleus::~Nucleus()
633{
634 delete ::routine;
635
636 delete ::allocator;
637 delete ::function;
638 delete ::context;
639
640 delete ::elfFile;
641 delete ::out;
642
643 ::codegenMutex.unlock();
644}
645
646void Nucleus::setDefaultConfig(const Config &cfg)
647{
648 std::unique_lock<std::mutex> lock(::defaultConfigLock);
649 ::defaultConfig() = cfg;
650}
651
652void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
653{
654 std::unique_lock<std::mutex> lock(::defaultConfigLock);
655 auto &config = ::defaultConfig();
656 config = cfgEdit.apply(config);
657}
658
659Config Nucleus::getDefaultConfig()
660{
661 std::unique_lock<std::mutex> lock(::defaultConfigLock);
662 return ::defaultConfig();
663}
664
665std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
666{
667 if (subzeroDumpEnabled)
668 {
669 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
670 context->getStrDump().SetUnbuffered();
671 }
672
673 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
674 {
675 createRetVoid();
676 }
677
678 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, name));
679
680 rr::optimize(::function);
681
682 ::function->computeInOutEdges();
683 ASSERT(!::function->hasError());
684
685 ::function->translate();
686 ASSERT(!::function->hasError());
687
688 auto globals = ::function->getGlobalInits();
689
690 if(globals && !globals->empty())
691 {
692 ::context->getGlobals()->merge(globals.get());
693 }
694
695 ::context->emitFileHeader();
696
697 if (subzeroEmitTextAsm)
698 {
699 ::function->emit();
700 }
701
702 ::function->emitIAS();
703 auto assembler = ::function->releaseAssembler();
704 auto objectWriter = ::context->getObjectWriter();
705 assembler->alignFunction();
706 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
707 ::context->lowerGlobals("last");
708 ::context->lowerConstants();
709 ::context->lowerJumpTables();
710 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
711 objectWriter->writeNonUserSections();
712
713 const void* entryBegin = ::routine->finalizeEntryBegin();
714 ::routine->setEntry(Nucleus::CoroutineEntryBegin, entryBegin);
715
716 Routine *handoffRoutine = ::routine;
717 ::routine = nullptr;
718
719 return std::shared_ptr<Routine>(handoffRoutine);
720}
721
722Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
723{
724 Ice::Type type = T(t);
725 int typeSize = Ice::typeWidthInBytes(type);
726 int totalSize = typeSize * (arraySize ? arraySize : 1);
727
728 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
729 auto address = ::function->makeVariable(T(getPointerType(t)));
730 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
731 ::function->getEntryNode()->getInsts().push_front(alloca);
732
733 return V(address);
734}
735
736BasicBlock *Nucleus::createBasicBlock()
737{
738 return B(::function->makeNode());
739}
740
741BasicBlock *Nucleus::getInsertBlock()
742{
743 return B(::basicBlock);
744}
745
746void Nucleus::setInsertBlock(BasicBlock *basicBlock)
747{
748// ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
749
750 Variable::materializeAll();
751
752 ::basicBlock = basicBlock;
753}
754
755void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
756{
757 uint32_t sequenceNumber = 0;
758 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
759 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
760
761 for(Type *type : Params)
762 {
763 Ice::Variable *arg = ::function->makeVariable(T(type));
764 ::function->addArg(arg);
765 }
766
767 Ice::CfgNode *node = ::function->makeNode();
768 ::function->setEntryNode(node);
769 ::basicBlock = node;
770}
771
772Value *Nucleus::getArgument(unsigned int index)
773{
774 return V(::function->getArgs()[index]);
775}
776
777void Nucleus::createRetVoid()
778{
779 // Code generated after this point is unreachable, so any variables
780 // being read can safely return an undefined value. We have to avoid
781 // materializing variables after the terminator ret instruction.
782 Variable::killUnmaterialized();
783
784 Ice::InstRet *ret = Ice::InstRet::create(::function);
785 ::basicBlock->appendInst(ret);
786}
787
788void Nucleus::createRet(Value *v)
789{
790 // Code generated after this point is unreachable, so any variables
791 // being read can safely return an undefined value. We have to avoid
792 // materializing variables after the terminator ret instruction.
793 Variable::killUnmaterialized();
794
795 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
796 ::basicBlock->appendInst(ret);
797}
798
799void Nucleus::createBr(BasicBlock *dest)
800{
801 Variable::materializeAll();
802
803 auto br = Ice::InstBr::create(::function, dest);
804 ::basicBlock->appendInst(br);
805}
806
807void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
808{
809 Variable::materializeAll();
810
811 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
812 ::basicBlock->appendInst(br);
813}
814
815static bool isCommutative(Ice::InstArithmetic::OpKind op)
816{
817 switch(op)
818 {
819 case Ice::InstArithmetic::Add:
820 case Ice::InstArithmetic::Fadd:
821 case Ice::InstArithmetic::Mul:
822 case Ice::InstArithmetic::Fmul:
823 case Ice::InstArithmetic::And:
824 case Ice::InstArithmetic::Or:
825 case Ice::InstArithmetic::Xor:
826 return true;
827 default:
828 return false;
829 }
830}
831
832static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
833{
834 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
835
836 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
837
838 Ice::Variable *result = ::function->makeVariable(lhs->getType());
839 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
840 ::basicBlock->appendInst(arithmetic);
841
842 return V(result);
843}
844
845Value *Nucleus::createAdd(Value *lhs, Value *rhs)
846{
847 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
848}
849
850Value *Nucleus::createSub(Value *lhs, Value *rhs)
851{
852 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
853}
854
855Value *Nucleus::createMul(Value *lhs, Value *rhs)
856{
857 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
858}
859
860Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
861{
862 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
863}
864
865Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
866{
867 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
868}
869
870Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
871{
872 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
873}
874
875Value *Nucleus::createFSub(Value *lhs, Value *rhs)
876{
877 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
878}
879
880Value *Nucleus::createFMul(Value *lhs, Value *rhs)
881{
882 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
883}
884
885Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
886{
887 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
888}
889
890Value *Nucleus::createURem(Value *lhs, Value *rhs)
891{
892 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
893}
894
895Value *Nucleus::createSRem(Value *lhs, Value *rhs)
896{
897 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
898}
899
900Value *Nucleus::createFRem(Value *lhs, Value *rhs)
901{
902 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
903}
904
905Value *Nucleus::createShl(Value *lhs, Value *rhs)
906{
907 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
908}
909
910Value *Nucleus::createLShr(Value *lhs, Value *rhs)
911{
912 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
913}
914
915Value *Nucleus::createAShr(Value *lhs, Value *rhs)
916{
917 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
918}
919
920Value *Nucleus::createAnd(Value *lhs, Value *rhs)
921{
922 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
923}
924
925Value *Nucleus::createOr(Value *lhs, Value *rhs)
926{
927 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
928}
929
930Value *Nucleus::createXor(Value *lhs, Value *rhs)
931{
932 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
933}
934
935Value *Nucleus::createNeg(Value *v)
936{
937 return createSub(createNullValue(T(v->getType())), v);
938}
939
940Value *Nucleus::createFNeg(Value *v)
941{
942 double c[4] = {-0.0, -0.0, -0.0, -0.0};
943 Value *negativeZero = Ice::isVectorType(v->getType()) ?
944 createConstantVector(c, T(v->getType())) :
945 V(::context->getConstantFloat(-0.0f));
946
947 return createFSub(negativeZero, v);
948}
949
950Value *Nucleus::createNot(Value *v)
951{
952 if(Ice::isScalarIntegerType(v->getType()))
953 {
954 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
955 }
956 else // Vector
957 {
958 int64_t c[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
959 return createXor(v, createConstantVector(c, T(v->getType())));
960 }
961}
962
963Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
964{
965 ASSERT(!atomic); // Unimplemented
966 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
967
968 int valueType = (int)reinterpret_cast<intptr_t>(type);
969 Ice::Variable *result = ::function->makeVariable(T(type));
970
971 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
972 {
973 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400974 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500975 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400976 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500977 auto pointer = RValue<Pointer<Byte>>(ptr);
978 Int x = *Pointer<Int>(pointer);
979
980 Int4 vector;
981 vector = Insert(vector, x, 0);
982
983 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
984 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400985 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500986 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400987 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500988 auto pointer = RValue<Pointer<Byte>>(ptr);
989 Int x = *Pointer<Int>(pointer);
990 Int y = *Pointer<Int>(pointer + 4);
991
992 Int4 vector;
993 vector = Insert(vector, x, 0);
994 vector = Insert(vector, y, 1);
995
996 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
997 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400998 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500999 else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001000 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001001 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001002 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001003 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1004 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1005 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1006 load->addArg(ptr);
1007 load->addArg(::context->getConstantInt32(typeSize(type)));
1008 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001009 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001010 }
1011 else
1012 {
1013 auto load = Ice::InstLoad::create(::function, result, ptr, align);
1014 ::basicBlock->appendInst(load);
1015 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001016
Nicolas Capens157ba262019-12-10 17:49:14 -05001017 return V(result);
1018}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001019
Nicolas Capens157ba262019-12-10 17:49:14 -05001020Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1021{
1022 ASSERT(!atomic); // Unimplemented
1023 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens598f8d82016-09-26 15:09:10 -04001024
Nicolas Capens157ba262019-12-10 17:49:14 -05001025 #if __has_feature(memory_sanitizer)
1026 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
1027 if(align != 0)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001028 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001029 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1030 call->addArg(ptr);
1031 call->addArg(::context->getConstantInt64(typeSize(type)));
1032 ::basicBlock->appendInst(call);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001033 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001034 #endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001035
Nicolas Capens157ba262019-12-10 17:49:14 -05001036 int valueType = (int)reinterpret_cast<intptr_t>(type);
1037
1038 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
1039 {
1040 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001041 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001042 if(typeSize(type) == 4)
1043 {
1044 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1045 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1046 ::basicBlock->appendInst(bitcast);
1047
1048 RValue<Int4> v(V(vector));
1049
1050 auto pointer = RValue<Pointer<Byte>>(ptr);
1051 Int x = Extract(v, 0);
1052 *Pointer<Int>(pointer) = x;
1053 }
1054 else if(typeSize(type) == 8)
1055 {
1056 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1057 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1058 ::basicBlock->appendInst(bitcast);
1059
1060 RValue<Int4> v(V(vector));
1061
1062 auto pointer = RValue<Pointer<Byte>>(ptr);
1063 Int x = Extract(v, 0);
1064 *Pointer<Int>(pointer) = x;
1065 Int y = Extract(v, 1);
1066 *Pointer<Int>(pointer + 4) = y;
1067 }
1068 else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001069 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001070 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001071 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001072 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
1073 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1074 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1075 store->addArg(value);
1076 store->addArg(ptr);
1077 store->addArg(::context->getConstantInt32(typeSize(type)));
1078 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001079 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001080 }
1081 else
1082 {
1083 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001084
Nicolas Capens157ba262019-12-10 17:49:14 -05001085 auto store = Ice::InstStore::create(::function, value, ptr, align);
1086 ::basicBlock->appendInst(store);
1087 }
1088
1089 return value;
1090}
1091
1092Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1093{
1094 ASSERT(index->getType() == Ice::IceType_i32);
1095
1096 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1097 {
1098 int32_t offset = constant->getValue() * (int)typeSize(type);
1099
1100 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001101 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001102 return ptr;
1103 }
1104
Nicolas Capens157ba262019-12-10 17:49:14 -05001105 return createAdd(ptr, createConstantInt(offset));
1106 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001107
Nicolas Capens157ba262019-12-10 17:49:14 -05001108 if(!Ice::isByteSizedType(T(type)))
1109 {
1110 index = createMul(index, createConstantInt((int)typeSize(type)));
1111 }
1112
1113 if(sizeof(void*) == 8)
1114 {
1115 if(unsignedIndex)
1116 {
1117 index = createZExt(index, T(Ice::IceType_i64));
1118 }
1119 else
1120 {
1121 index = createSExt(index, T(Ice::IceType_i64));
1122 }
1123 }
1124
1125 return createAdd(ptr, index);
1126}
1127
1128Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1129{
1130 UNIMPLEMENTED("createAtomicAdd");
1131 return nullptr;
1132}
1133
1134Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1135{
1136 UNIMPLEMENTED("createAtomicSub");
1137 return nullptr;
1138}
1139
1140Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1141{
1142 UNIMPLEMENTED("createAtomicAnd");
1143 return nullptr;
1144}
1145
1146Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1147{
1148 UNIMPLEMENTED("createAtomicOr");
1149 return nullptr;
1150}
1151
1152Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1153{
1154 UNIMPLEMENTED("createAtomicXor");
1155 return nullptr;
1156}
1157
1158Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1159{
1160 UNIMPLEMENTED("createAtomicMin");
1161 return nullptr;
1162}
1163
1164Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1165{
1166 UNIMPLEMENTED("createAtomicMax");
1167 return nullptr;
1168}
1169
1170Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1171{
1172 UNIMPLEMENTED("createAtomicUMin");
1173 return nullptr;
1174}
1175
1176Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1177{
1178 UNIMPLEMENTED("createAtomicUMax");
1179 return nullptr;
1180}
1181
1182Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1183{
1184 UNIMPLEMENTED("createAtomicExchange");
1185 return nullptr;
1186}
1187
1188Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1189{
1190 UNIMPLEMENTED("createAtomicCompareExchange");
1191 return nullptr;
1192}
1193
1194static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1195{
1196 if(v->getType() == T(destType))
1197 {
1198 return v;
1199 }
1200
1201 Ice::Variable *result = ::function->makeVariable(T(destType));
1202 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1203 ::basicBlock->appendInst(cast);
1204
1205 return V(result);
1206}
1207
1208Value *Nucleus::createTrunc(Value *v, Type *destType)
1209{
1210 return createCast(Ice::InstCast::Trunc, v, destType);
1211}
1212
1213Value *Nucleus::createZExt(Value *v, Type *destType)
1214{
1215 return createCast(Ice::InstCast::Zext, v, destType);
1216}
1217
1218Value *Nucleus::createSExt(Value *v, Type *destType)
1219{
1220 return createCast(Ice::InstCast::Sext, v, destType);
1221}
1222
1223Value *Nucleus::createFPToUI(Value *v, Type *destType)
1224{
1225 return createCast(Ice::InstCast::Fptoui, v, destType);
1226}
1227
1228Value *Nucleus::createFPToSI(Value *v, Type *destType)
1229{
1230 return createCast(Ice::InstCast::Fptosi, v, destType);
1231}
1232
1233Value *Nucleus::createSIToFP(Value *v, Type *destType)
1234{
1235 return createCast(Ice::InstCast::Sitofp, v, destType);
1236}
1237
1238Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1239{
1240 return createCast(Ice::InstCast::Fptrunc, v, destType);
1241}
1242
1243Value *Nucleus::createFPExt(Value *v, Type *destType)
1244{
1245 return createCast(Ice::InstCast::Fpext, v, destType);
1246}
1247
1248Value *Nucleus::createBitCast(Value *v, Type *destType)
1249{
1250 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1251 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1252 // emulate them by writing to the stack and reading back as the destination type.
1253 if(emulateMismatchedBitCast)
1254 {
1255 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1256 {
1257 Value *address = allocateStackVariable(destType);
1258 createStore(v, address, T(v->getType()));
1259 return createLoad(address, destType);
1260 }
1261 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1262 {
1263 Value *address = allocateStackVariable(T(v->getType()));
1264 createStore(v, address, T(v->getType()));
1265 return createLoad(address, destType);
1266 }
1267 }
1268
1269 return createCast(Ice::InstCast::Bitcast, v, destType);
1270}
1271
1272static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1273{
1274 ASSERT(lhs->getType() == rhs->getType());
1275
1276 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1277 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1278 ::basicBlock->appendInst(cmp);
1279
1280 return V(result);
1281}
1282
1283Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1284{
1285 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1286}
1287
1288Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1289{
1290 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1291}
1292
1293Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1294{
1295 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1296}
1297
1298Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1299{
1300 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1301}
1302
1303Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1304{
1305 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1306}
1307
1308Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1309{
1310 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1311}
1312
1313Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1314{
1315 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1316}
1317
1318Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1319{
1320 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1321}
1322
1323Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1324{
1325 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1326}
1327
1328Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1329{
1330 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1331}
1332
1333Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1334{
1335 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1336}
1337
1338static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1339{
1340 ASSERT(lhs->getType() == rhs->getType());
1341 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1342
1343 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1344 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1345 ::basicBlock->appendInst(cmp);
1346
1347 return V(result);
1348}
1349
1350Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1351{
1352 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1353}
1354
1355Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1356{
1357 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1358}
1359
1360Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1361{
1362 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1363}
1364
1365Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1366{
1367 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1368}
1369
1370Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1371{
1372 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1373}
1374
1375Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1376{
1377 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1378}
1379
1380Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1381{
1382 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1383}
1384
1385Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1386{
1387 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1388}
1389
1390Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1391{
1392 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1393}
1394
1395Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1396{
1397 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1398}
1399
1400Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1401{
1402 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1403}
1404
1405Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1406{
1407 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1408}
1409
1410Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1411{
1412 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1413}
1414
1415Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1416{
1417 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1418}
1419
1420Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1421{
1422 auto result = ::function->makeVariable(T(type));
1423 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1424 ::basicBlock->appendInst(extract);
1425
1426 return V(result);
1427}
1428
1429Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1430{
1431 auto result = ::function->makeVariable(vector->getType());
1432 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1433 ::basicBlock->appendInst(insert);
1434
1435 return V(result);
1436}
1437
1438Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1439{
1440 ASSERT(V1->getType() == V2->getType());
1441
1442 int size = Ice::typeNumElements(V1->getType());
1443 auto result = ::function->makeVariable(V1->getType());
1444 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1445
1446 for(int i = 0; i < size; i++)
1447 {
1448 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1449 }
1450
1451 ::basicBlock->appendInst(shuffle);
1452
1453 return V(result);
1454}
1455
1456Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1457{
1458 ASSERT(ifTrue->getType() == ifFalse->getType());
1459
1460 auto result = ::function->makeVariable(ifTrue->getType());
1461 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1462 ::basicBlock->appendInst(select);
1463
1464 return V(result);
1465}
1466
1467SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1468{
1469 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1470 ::basicBlock->appendInst(switchInst);
1471
1472 return reinterpret_cast<SwitchCases*>(switchInst);
1473}
1474
1475void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1476{
1477 switchCases->addBranch(label, label, branch);
1478}
1479
1480void Nucleus::createUnreachable()
1481{
1482 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1483 ::basicBlock->appendInst(unreachable);
1484}
1485
1486Type *Nucleus::getPointerType(Type *ElementType)
1487{
1488 if(sizeof(void*) == 8)
1489 {
1490 return T(Ice::IceType_i64);
1491 }
1492 else
1493 {
1494 return T(Ice::IceType_i32);
1495 }
1496}
1497
1498Value *Nucleus::createNullValue(Type *Ty)
1499{
1500 if(Ice::isVectorType(T(Ty)))
1501 {
1502 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
1503 int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1504 return createConstantVector(c, Ty);
1505 }
1506 else
1507 {
1508 return V(::context->getConstantZero(T(Ty)));
1509 }
1510}
1511
1512Value *Nucleus::createConstantLong(int64_t i)
1513{
1514 return V(::context->getConstantInt64(i));
1515}
1516
1517Value *Nucleus::createConstantInt(int i)
1518{
1519 return V(::context->getConstantInt32(i));
1520}
1521
1522Value *Nucleus::createConstantInt(unsigned int i)
1523{
1524 return V(::context->getConstantInt32(i));
1525}
1526
1527Value *Nucleus::createConstantBool(bool b)
1528{
1529 return V(::context->getConstantInt1(b));
1530}
1531
1532Value *Nucleus::createConstantByte(signed char i)
1533{
1534 return V(::context->getConstantInt8(i));
1535}
1536
1537Value *Nucleus::createConstantByte(unsigned char i)
1538{
1539 return V(::context->getConstantInt8(i));
1540}
1541
1542Value *Nucleus::createConstantShort(short i)
1543{
1544 return V(::context->getConstantInt16(i));
1545}
1546
1547Value *Nucleus::createConstantShort(unsigned short i)
1548{
1549 return V(::context->getConstantInt16(i));
1550}
1551
1552Value *Nucleus::createConstantFloat(float x)
1553{
1554 return V(::context->getConstantFloat(x));
1555}
1556
1557Value *Nucleus::createNullPointer(Type *Ty)
1558{
1559 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1560}
1561
1562Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1563{
1564 const int vectorSize = 16;
1565 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
1566 const int alignment = vectorSize;
1567 auto globalPool = ::function->getGlobalPool();
1568
1569 const int64_t *i = constants;
1570 const double *f = reinterpret_cast<const double*>(constants);
1571 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1572
1573 switch((int)reinterpret_cast<intptr_t>(type))
1574 {
1575 case Ice::IceType_v4i32:
1576 case Ice::IceType_v4i1:
1577 {
1578 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1579 static_assert(sizeof(initializer) == vectorSize, "!");
1580 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1581 }
1582 break;
1583 case Ice::IceType_v4f32:
1584 {
1585 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1586 static_assert(sizeof(initializer) == vectorSize, "!");
1587 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1588 }
1589 break;
1590 case Ice::IceType_v8i16:
1591 case Ice::IceType_v8i1:
1592 {
1593 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1594 static_assert(sizeof(initializer) == vectorSize, "!");
1595 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1596 }
1597 break;
1598 case Ice::IceType_v16i8:
1599 case Ice::IceType_v16i1:
1600 {
1601 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1602 static_assert(sizeof(initializer) == vectorSize, "!");
1603 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1604 }
1605 break;
1606 case Type_v2i32:
1607 {
1608 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1609 static_assert(sizeof(initializer) == vectorSize, "!");
1610 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1611 }
1612 break;
1613 case Type_v2f32:
1614 {
1615 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1616 static_assert(sizeof(initializer) == vectorSize, "!");
1617 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1618 }
1619 break;
1620 case Type_v4i16:
1621 {
1622 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1623 static_assert(sizeof(initializer) == vectorSize, "!");
1624 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1625 }
1626 break;
1627 case Type_v8i8:
1628 {
1629 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1630 static_assert(sizeof(initializer) == vectorSize, "!");
1631 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1632 }
1633 break;
1634 case Type_v4i8:
1635 {
1636 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1637 static_assert(sizeof(initializer) == vectorSize, "!");
1638 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1639 }
1640 break;
1641 default:
1642 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
1643 }
1644
1645 auto name = Ice::GlobalString::createWithoutString(::context);
1646 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1647 variableDeclaration->setName(name);
1648 variableDeclaration->setAlignment(alignment);
1649 variableDeclaration->setIsConstant(true);
1650 variableDeclaration->addInitializer(dataInitializer);
1651
1652 ::function->addGlobal(variableDeclaration);
1653
1654 constexpr int32_t offset = 0;
1655 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1656
1657 Ice::Variable *result = ::function->makeVariable(T(type));
1658 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1659 ::basicBlock->appendInst(load);
1660
1661 return V(result);
1662}
1663
1664Value *Nucleus::createConstantVector(const double *constants, Type *type)
1665{
1666 return createConstantVector((const int64_t*)constants, type);
1667}
1668
1669Type *Void::getType()
1670{
1671 return T(Ice::IceType_void);
1672}
1673
1674Type *Bool::getType()
1675{
1676 return T(Ice::IceType_i1);
1677}
1678
1679Type *Byte::getType()
1680{
1681 return T(Ice::IceType_i8);
1682}
1683
1684Type *SByte::getType()
1685{
1686 return T(Ice::IceType_i8);
1687}
1688
1689Type *Short::getType()
1690{
1691 return T(Ice::IceType_i16);
1692}
1693
1694Type *UShort::getType()
1695{
1696 return T(Ice::IceType_i16);
1697}
1698
1699Type *Byte4::getType()
1700{
1701 return T(Type_v4i8);
1702}
1703
1704Type *SByte4::getType()
1705{
1706 return T(Type_v4i8);
1707}
1708
1709namespace
1710{
1711 RValue<Byte> SaturateUnsigned(RValue<Short> x)
1712 {
1713 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
1714 }
1715
1716 RValue<Byte> Extract(RValue<Byte8> val, int i)
1717 {
1718 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1719 }
1720
1721 RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1722 {
1723 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1724 }
1725}
1726
1727RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1728{
1729 if(emulateIntrinsics)
1730 {
1731 Byte8 result;
1732 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1733 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1734 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1735 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1736 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1737 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1738 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1739 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
1740
1741 return result;
1742 }
1743 else
1744 {
1745 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1746 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1747 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1748 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1749 paddusb->addArg(x.value);
1750 paddusb->addArg(y.value);
1751 ::basicBlock->appendInst(paddusb);
1752
1753 return RValue<Byte8>(V(result));
1754 }
1755}
1756
1757RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
1758{
1759 if(emulateIntrinsics)
1760 {
1761 Byte8 result;
1762 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1763 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1764 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1765 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1766 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1767 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1768 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1769 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
1770
1771 return result;
1772 }
1773 else
1774 {
1775 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1776 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1777 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1778 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1779 psubusw->addArg(x.value);
1780 psubusw->addArg(y.value);
1781 ::basicBlock->appendInst(psubusw);
1782
1783 return RValue<Byte8>(V(result));
1784 }
1785}
1786
1787RValue<SByte> Extract(RValue<SByte8> val, int i)
1788{
1789 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
1790}
1791
1792RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
1793{
1794 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
1795}
1796
1797RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1798{
1799 if(emulateIntrinsics)
1800 {
1801 SByte8 result;
1802 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
1803 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
1804 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
1805 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
1806 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
1807 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
1808 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
1809 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
1810
1811 return result;
1812 }
1813 else
1814 {
1815 #if defined(__i386__) || defined(__x86_64__)
1816 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
1817 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
1818 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
1819
1820 return As<SByte8>(hi | lo);
1821 #else
1822 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capensbd65da92017-01-05 16:31:06 -05001823 #endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001824 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001825}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001826
Nicolas Capens157ba262019-12-10 17:49:14 -05001827RValue<Int> SignMask(RValue<Byte8> x)
1828{
1829 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001830 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001831 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1832 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001833 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001834 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01001835 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001836 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1837 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1838 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1839 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1840 movmsk->addArg(x.value);
1841 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01001842
Nicolas Capens157ba262019-12-10 17:49:14 -05001843 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01001844 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001845}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001846
1847// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
1848// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05001849// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001850// }
1851
Nicolas Capens157ba262019-12-10 17:49:14 -05001852RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
1853{
1854 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1855}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001856
Nicolas Capens157ba262019-12-10 17:49:14 -05001857Type *Byte8::getType()
1858{
1859 return T(Type_v8i8);
1860}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001861
Nicolas Capens598f8d82016-09-26 15:09:10 -04001862// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
1863// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001864// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001865// }
1866
1867// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1868// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001869// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001870// }
1871
Nicolas Capens157ba262019-12-10 17:49:14 -05001872RValue<SByte> SaturateSigned(RValue<Short> x)
1873{
1874 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
1875}
1876
1877RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
1878{
1879 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04001880 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001881 SByte8 result;
1882 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1883 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1884 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1885 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1886 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1887 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1888 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1889 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04001890
Nicolas Capens157ba262019-12-10 17:49:14 -05001891 return result;
1892 }
1893 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001894 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001895 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1896 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1897 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1898 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1899 paddsb->addArg(x.value);
1900 paddsb->addArg(y.value);
1901 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001902
Nicolas Capens157ba262019-12-10 17:49:14 -05001903 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001904 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001905}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001906
Nicolas Capens157ba262019-12-10 17:49:14 -05001907RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
1908{
1909 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001910 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001911 SByte8 result;
1912 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1913 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1914 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1915 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1916 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1917 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1918 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1919 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001920
Nicolas Capens157ba262019-12-10 17:49:14 -05001921 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04001922 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001923 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001924 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001925 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1926 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1927 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1928 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1929 psubsb->addArg(x.value);
1930 psubsb->addArg(y.value);
1931 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04001932
Nicolas Capens157ba262019-12-10 17:49:14 -05001933 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001934 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001935}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001936
Nicolas Capens157ba262019-12-10 17:49:14 -05001937RValue<Int> SignMask(RValue<SByte8> x)
1938{
1939 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001940 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001941 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1942 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001943 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001944 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001945 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001946 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1947 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1948 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1949 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1950 movmsk->addArg(x.value);
1951 ::basicBlock->appendInst(movmsk);
1952
1953 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04001954 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001955}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001956
Nicolas Capens157ba262019-12-10 17:49:14 -05001957RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
1958{
1959 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
1960}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001961
Nicolas Capens157ba262019-12-10 17:49:14 -05001962RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
1963{
1964 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1965}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001966
Nicolas Capens157ba262019-12-10 17:49:14 -05001967Type *SByte8::getType()
1968{
1969 return T(Type_v8i8);
1970}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001971
Nicolas Capens157ba262019-12-10 17:49:14 -05001972Type *Byte16::getType()
1973{
1974 return T(Ice::IceType_v16i8);
1975}
Nicolas Capens16b5f152016-10-13 13:39:01 -04001976
Nicolas Capens157ba262019-12-10 17:49:14 -05001977Type *SByte16::getType()
1978{
1979 return T(Ice::IceType_v16i8);
1980}
Nicolas Capens16b5f152016-10-13 13:39:01 -04001981
Nicolas Capens157ba262019-12-10 17:49:14 -05001982Type *Short2::getType()
1983{
1984 return T(Type_v2i16);
1985}
Nicolas Capensd4227962016-11-09 14:24:25 -05001986
Nicolas Capens157ba262019-12-10 17:49:14 -05001987Type *UShort2::getType()
1988{
1989 return T(Type_v2i16);
1990}
Nicolas Capensd4227962016-11-09 14:24:25 -05001991
Nicolas Capens157ba262019-12-10 17:49:14 -05001992Short4::Short4(RValue<Int4> cast)
1993{
1994 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
1995 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
1996 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
1997
1998 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
1999 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2000
2001 storeValue(short4);
2002}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002003
2004// Short4::Short4(RValue<Float> cast)
2005// {
2006// }
2007
Nicolas Capens157ba262019-12-10 17:49:14 -05002008Short4::Short4(RValue<Float4> cast)
2009{
2010 UNIMPLEMENTED("Short4::Short4(RValue<Float4> cast)");
2011}
2012
2013RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2014{
2015 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002016 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002017 Short4 result;
2018 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2019 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2020 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2021 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002022
2023 return result;
2024 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002025 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002026 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002027 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2028 }
2029}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002030
Nicolas Capens157ba262019-12-10 17:49:14 -05002031RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2032{
2033 if(emulateIntrinsics)
2034 {
2035 Short4 result;
2036 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2037 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2038 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2039 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2040
2041 return result;
2042 }
2043 else
2044 {
2045 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2046 }
2047}
2048
2049RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2050{
2051 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2052 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2053 ::basicBlock->appendInst(cmp);
2054
2055 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2056 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2057 ::basicBlock->appendInst(select);
2058
2059 return RValue<Short4>(V(result));
2060}
2061
2062RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2063{
2064 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2065 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2066 ::basicBlock->appendInst(cmp);
2067
2068 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2069 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2070 ::basicBlock->appendInst(select);
2071
2072 return RValue<Short4>(V(result));
2073}
2074
2075RValue<Short> SaturateSigned(RValue<Int> x)
2076{
2077 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2078}
2079
2080RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2081{
2082 if(emulateIntrinsics)
2083 {
2084 Short4 result;
2085 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2086 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2087 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2088 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2089
2090 return result;
2091 }
2092 else
2093 {
2094 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2095 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2096 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2097 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2098 paddsw->addArg(x.value);
2099 paddsw->addArg(y.value);
2100 ::basicBlock->appendInst(paddsw);
2101
2102 return RValue<Short4>(V(result));
2103 }
2104}
2105
2106RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2107{
2108 if(emulateIntrinsics)
2109 {
2110 Short4 result;
2111 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2112 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2113 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2114 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2115
2116 return result;
2117 }
2118 else
2119 {
2120 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2121 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2122 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2123 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2124 psubsw->addArg(x.value);
2125 psubsw->addArg(y.value);
2126 ::basicBlock->appendInst(psubsw);
2127
2128 return RValue<Short4>(V(result));
2129 }
2130}
2131
2132RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2133{
2134 if(emulateIntrinsics)
2135 {
2136 Short4 result;
2137 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2138 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2139 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2140 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2141
2142 return result;
2143 }
2144 else
2145 {
2146 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2147 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2148 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2149 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2150 pmulhw->addArg(x.value);
2151 pmulhw->addArg(y.value);
2152 ::basicBlock->appendInst(pmulhw);
2153
2154 return RValue<Short4>(V(result));
2155 }
2156}
2157
2158RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2159{
2160 if(emulateIntrinsics)
2161 {
2162 Int2 result;
2163 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2164 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2165
2166 return result;
2167 }
2168 else
2169 {
2170 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2171 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2172 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2173 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2174 pmaddwd->addArg(x.value);
2175 pmaddwd->addArg(y.value);
2176 ::basicBlock->appendInst(pmaddwd);
2177
2178 return As<Int2>(V(result));
2179 }
2180}
2181
2182RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2183{
2184 if(emulateIntrinsics)
2185 {
2186 SByte8 result;
2187 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2188 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2189 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2190 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2191 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2192 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2193 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2194 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2195
2196 return result;
2197 }
2198 else
2199 {
2200 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2201 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2202 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2203 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2204 pack->addArg(x.value);
2205 pack->addArg(y.value);
2206 ::basicBlock->appendInst(pack);
2207
2208 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2209 }
2210}
2211
2212RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2213{
2214 if(emulateIntrinsics)
2215 {
2216 Byte8 result;
2217 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2218 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2219 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2220 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2221 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2222 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2223 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2224 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2225
2226 return result;
2227 }
2228 else
2229 {
2230 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2231 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2232 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2233 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2234 pack->addArg(x.value);
2235 pack->addArg(y.value);
2236 ::basicBlock->appendInst(pack);
2237
2238 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2239 }
2240}
2241
2242RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2243{
2244 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2245}
2246
2247RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2248{
2249 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2250}
2251
2252Type *Short4::getType()
2253{
2254 return T(Type_v4i16);
2255}
2256
2257UShort4::UShort4(RValue<Float4> cast, bool saturate)
2258{
2259 if(saturate)
2260 {
2261 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002262 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002263 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2264 // PackUnsigned takes care of 0x0000 saturation.
2265 Int4 int4(Min(cast, Float4(0xFFFF)));
2266 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002267 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002268 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002269 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002270 // ARM saturates the 32-bit integer result on overflow/undeflow.
2271 Int4 int4(cast);
2272 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002273 }
2274 else
2275 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002276 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002277 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002278 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002279 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002280 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002281 *this = Short4(Int4(cast));
2282 }
2283}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002284
Nicolas Capens157ba262019-12-10 17:49:14 -05002285RValue<UShort> Extract(RValue<UShort4> val, int i)
2286{
2287 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2288}
2289
2290RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2291{
2292 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2293}
2294
2295RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2296{
2297 if(emulateIntrinsics)
2298 {
2299 UShort4 result;
2300 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2301 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2302 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2303 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2304
2305 return result;
2306 }
2307 else
2308 {
2309 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2310 }
2311}
2312
2313RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2314{
2315 if(emulateIntrinsics)
2316 {
2317 UShort4 result;
2318 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2319 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2320 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2321 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2322
2323 return result;
2324 }
2325 else
2326 {
2327 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2328 }
2329}
2330
2331RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2332{
2333 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2334 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2335 ::basicBlock->appendInst(cmp);
2336
2337 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2338 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2339 ::basicBlock->appendInst(select);
2340
2341 return RValue<UShort4>(V(result));
2342}
2343
2344RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2345{
2346 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2347 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2348 ::basicBlock->appendInst(cmp);
2349
2350 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2351 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2352 ::basicBlock->appendInst(select);
2353
2354 return RValue<UShort4>(V(result));
2355}
2356
2357RValue<UShort> SaturateUnsigned(RValue<Int> x)
2358{
2359 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2360}
2361
2362RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2363{
2364 if(emulateIntrinsics)
2365 {
2366 UShort4 result;
2367 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2368 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2369 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2370 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2371
2372 return result;
2373 }
2374 else
2375 {
2376 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2377 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2378 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2379 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2380 paddusw->addArg(x.value);
2381 paddusw->addArg(y.value);
2382 ::basicBlock->appendInst(paddusw);
2383
2384 return RValue<UShort4>(V(result));
2385 }
2386}
2387
2388RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2389{
2390 if(emulateIntrinsics)
2391 {
2392 UShort4 result;
2393 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2394 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2395 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2396 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2397
2398 return result;
2399 }
2400 else
2401 {
2402 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2403 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2404 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2405 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2406 psubusw->addArg(x.value);
2407 psubusw->addArg(y.value);
2408 ::basicBlock->appendInst(psubusw);
2409
2410 return RValue<UShort4>(V(result));
2411 }
2412}
2413
2414RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2415{
2416 if(emulateIntrinsics)
2417 {
2418 UShort4 result;
2419 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2420 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2421 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2422 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2423
2424 return result;
2425 }
2426 else
2427 {
2428 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2429 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2430 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2431 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2432 pmulhuw->addArg(x.value);
2433 pmulhuw->addArg(y.value);
2434 ::basicBlock->appendInst(pmulhuw);
2435
2436 return RValue<UShort4>(V(result));
2437 }
2438}
2439
2440RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2441{
2442 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2443
2444 // Scalarized implementation.
2445 Int4 result;
2446 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2447 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2448 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2449 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2450
2451 return result;
2452}
2453
2454RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2455{
2456 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2457
2458 if(false) // Partial product based implementation.
2459 {
2460 auto xh = x >> 16;
2461 auto yh = y >> 16;
2462 auto xl = x & UInt4(0x0000FFFF);
2463 auto yl = y & UInt4(0x0000FFFF);
2464 auto xlyh = xl * yh;
2465 auto xhyl = xh * yl;
2466 auto xlyhh = xlyh >> 16;
2467 auto xhylh = xhyl >> 16;
2468 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2469 auto xhyll = xhyl & UInt4(0x0000FFFF);
2470 auto xlylh = (xl * yl) >> 16;
2471 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2472
2473 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002474 }
2475
Nicolas Capens157ba262019-12-10 17:49:14 -05002476 // Scalarized implementation.
2477 Int4 result;
2478 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2479 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2480 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2481 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2482
2483 return As<UInt4>(result);
2484}
2485
2486RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2487{
2488 UNIMPLEMENTED("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
2489 return UShort4(0);
2490}
2491
2492Type *UShort4::getType()
2493{
2494 return T(Type_v4i16);
2495}
2496
2497RValue<Short> Extract(RValue<Short8> val, int i)
2498{
2499 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2500}
2501
2502RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2503{
2504 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2505}
2506
2507RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2508{
2509 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002510 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002511 Short8 result;
2512 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2513 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2514 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2515 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2516 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2517 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2518 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2519 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002520
Nicolas Capens157ba262019-12-10 17:49:14 -05002521 return result;
2522 }
2523 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002524 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002525 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002526 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002527}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002528
Nicolas Capens157ba262019-12-10 17:49:14 -05002529RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2530{
2531 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002532 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002533 Short8 result;
2534 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2535 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2536 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2537 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2538 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
2539 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
2540 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
2541 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002542
Nicolas Capens157ba262019-12-10 17:49:14 -05002543 return result;
2544 }
2545 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002546 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002547 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002548 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002549}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002550
Nicolas Capens157ba262019-12-10 17:49:14 -05002551RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2552{
2553 UNIMPLEMENTED("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
2554 return Int4(0);
2555}
2556
2557RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2558{
2559 UNIMPLEMENTED("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
2560 return Short8(0);
2561}
2562
2563Type *Short8::getType()
2564{
2565 return T(Ice::IceType_v8i16);
2566}
2567
2568RValue<UShort> Extract(RValue<UShort8> val, int i)
2569{
2570 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2571}
2572
2573RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2574{
2575 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2576}
2577
2578RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2579{
2580 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002581 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002582 UShort8 result;
2583 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2584 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2585 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2586 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2587 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
2588 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
2589 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
2590 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002591
Nicolas Capens157ba262019-12-10 17:49:14 -05002592 return result;
2593 }
2594 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002595 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002596 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002597 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002598}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002599
Nicolas Capens157ba262019-12-10 17:49:14 -05002600RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2601{
2602 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002603 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002604 UShort8 result;
2605 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2606 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2607 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2608 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2609 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
2610 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
2611 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
2612 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002613
Nicolas Capens157ba262019-12-10 17:49:14 -05002614 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002615 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002616 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002617 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002618 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002619 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002620}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002621
Nicolas Capens157ba262019-12-10 17:49:14 -05002622RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
2623{
2624 UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
2625 return UShort8(0);
2626}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002627
Nicolas Capens157ba262019-12-10 17:49:14 -05002628RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2629{
2630 UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
2631 return UShort8(0);
2632}
2633
2634// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
Nicolas Capens598f8d82016-09-26 15:09:10 -04002635// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
2636// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002637// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002638// }
2639
Nicolas Capens157ba262019-12-10 17:49:14 -05002640Type *UShort8::getType()
2641{
2642 return T(Ice::IceType_v8i16);
2643}
2644
2645RValue<Int> operator++(Int &val, int) // Post-increment
2646{
2647 RValue<Int> res = val;
2648 val += 1;
2649 return res;
2650}
2651
2652const Int &operator++(Int &val) // Pre-increment
2653{
2654 val += 1;
2655 return val;
2656}
2657
2658RValue<Int> operator--(Int &val, int) // Post-decrement
2659{
2660 RValue<Int> res = val;
2661 val -= 1;
2662 return res;
2663}
2664
2665const Int &operator--(Int &val) // Pre-decrement
2666{
2667 val -= 1;
2668 return val;
2669}
2670
2671RValue<Int> RoundInt(RValue<Float> cast)
2672{
2673 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002674 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002675 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2676 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002677 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002678 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002679 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002680 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2681 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2682 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2683 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2684 nearbyint->addArg(cast.value);
2685 ::basicBlock->appendInst(nearbyint);
2686
2687 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002688 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002689}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002690
Nicolas Capens157ba262019-12-10 17:49:14 -05002691Type *Int::getType()
2692{
2693 return T(Ice::IceType_i32);
2694}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002695
Nicolas Capens157ba262019-12-10 17:49:14 -05002696Type *Long::getType()
2697{
2698 return T(Ice::IceType_i64);
2699}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002700
Nicolas Capens157ba262019-12-10 17:49:14 -05002701UInt::UInt(RValue<Float> cast)
2702{
2703 // Smallest positive value representable in UInt, but not in Int
2704 const unsigned int ustart = 0x80000000u;
2705 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002706
Nicolas Capens157ba262019-12-10 17:49:14 -05002707 // If the value is negative, store 0, otherwise store the result of the conversion
2708 storeValue((~(As<Int>(cast) >> 31) &
2709 // Check if the value can be represented as an Int
2710 IfThenElse(cast >= ustartf,
2711 // If the value is too large, subtract ustart and re-add it after conversion.
2712 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2713 // Otherwise, just convert normally
2714 Int(cast))).value);
2715}
Nicolas Capensa8086512016-11-07 17:32:17 -05002716
Nicolas Capens157ba262019-12-10 17:49:14 -05002717RValue<UInt> operator++(UInt &val, int) // Post-increment
2718{
2719 RValue<UInt> res = val;
2720 val += 1;
2721 return res;
2722}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002723
Nicolas Capens157ba262019-12-10 17:49:14 -05002724const UInt &operator++(UInt &val) // Pre-increment
2725{
2726 val += 1;
2727 return val;
2728}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002729
Nicolas Capens157ba262019-12-10 17:49:14 -05002730RValue<UInt> operator--(UInt &val, int) // Post-decrement
2731{
2732 RValue<UInt> res = val;
2733 val -= 1;
2734 return res;
2735}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002736
Nicolas Capens157ba262019-12-10 17:49:14 -05002737const UInt &operator--(UInt &val) // Pre-decrement
2738{
2739 val -= 1;
2740 return val;
2741}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002742
Nicolas Capens598f8d82016-09-26 15:09:10 -04002743// RValue<UInt> RoundUInt(RValue<Float> cast)
2744// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002745// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002746// }
2747
Nicolas Capens157ba262019-12-10 17:49:14 -05002748Type *UInt::getType()
2749{
2750 return T(Ice::IceType_i32);
2751}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002752
2753// Int2::Int2(RValue<Int> cast)
2754// {
2755// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2756// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
2757//
2758// Constant *shuffle[2];
2759// shuffle[0] = Nucleus::createConstantInt(0);
2760// shuffle[1] = Nucleus::createConstantInt(0);
2761//
2762// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
2763//
2764// storeValue(replicate);
2765// }
2766
Nicolas Capens157ba262019-12-10 17:49:14 -05002767RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
2768{
2769 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002770 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002771 Int2 result;
2772 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2773 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002774
Nicolas Capens157ba262019-12-10 17:49:14 -05002775 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002776 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002777 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002778 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002779 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002780 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002781}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002782
Nicolas Capens157ba262019-12-10 17:49:14 -05002783RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
2784{
2785 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002786 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002787 Int2 result;
2788 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2789 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2790
2791 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002792 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002793 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002794 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002795 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002796 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002797}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002798
Nicolas Capens157ba262019-12-10 17:49:14 -05002799Type *Int2::getType()
2800{
2801 return T(Type_v2i32);
2802}
2803
2804RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
2805{
2806 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002807 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002808 UInt2 result;
2809 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
2810 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002811
Nicolas Capens157ba262019-12-10 17:49:14 -05002812 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002813 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002814 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002815 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002816 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002817 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002818}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002819
Nicolas Capens157ba262019-12-10 17:49:14 -05002820RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
2821{
2822 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002823 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002824 UInt2 result;
2825 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
2826 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05002827
Nicolas Capens157ba262019-12-10 17:49:14 -05002828 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002829 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002830 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002831 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002832 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002833 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002834}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002835
Nicolas Capens157ba262019-12-10 17:49:14 -05002836Type *UInt2::getType()
2837{
2838 return T(Type_v2i32);
2839}
2840
2841Int4::Int4(RValue<Byte4> cast) : XYZW(this)
2842{
2843 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2844 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2845
2846 Value *e;
2847 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
2848 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2849 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
2850
2851 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2852 Value *d = Nucleus::createBitCast(c, Short8::getType());
2853 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
2854
2855 Value *f = Nucleus::createBitCast(e, Int4::getType());
2856 storeValue(f);
2857}
2858
2859Int4::Int4(RValue<SByte4> cast) : XYZW(this)
2860{
2861 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2862 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2863
2864 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2865 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2866 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
2867
2868 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
2869 Value *d = Nucleus::createBitCast(c, Short8::getType());
2870 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
2871
2872 *this = As<Int4>(e) >> 24;
2873}
2874
2875Int4::Int4(RValue<Short4> cast) : XYZW(this)
2876{
2877 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
2878 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2879
2880 *this = As<Int4>(c) >> 16;
2881}
2882
2883Int4::Int4(RValue<UShort4> cast) : XYZW(this)
2884{
2885 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2886 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2887 Value *d = Nucleus::createBitCast(c, Int4::getType());
2888 storeValue(d);
2889}
2890
2891Int4::Int4(RValue<Int> rhs) : XYZW(this)
2892{
2893 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
2894
2895 int swizzle[4] = {0, 0, 0, 0};
2896 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
2897
2898 storeValue(replicate);
2899}
2900
2901RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
2902{
2903 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002904 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002905 Int4 result;
2906 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2907 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
2908 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
2909 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002910
Nicolas Capens157ba262019-12-10 17:49:14 -05002911 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002912 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002913 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002914 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002915 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002916 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002917}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002918
Nicolas Capens157ba262019-12-10 17:49:14 -05002919RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
2920{
2921 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002922 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002923 Int4 result;
2924 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2925 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2926 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
2927 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05002928
Nicolas Capens157ba262019-12-10 17:49:14 -05002929 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002930 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002931 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002932 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002933 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002934 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002935}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002936
Nicolas Capens157ba262019-12-10 17:49:14 -05002937RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2938{
2939 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
2940}
2941
2942RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2943{
2944 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
2945}
2946
2947RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2948{
2949 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
2950}
2951
2952RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2953{
2954 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
2955}
2956
2957RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2958{
2959 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
2960}
2961
2962RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2963{
2964 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
2965}
2966
2967RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2968{
2969 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2970 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2971 ::basicBlock->appendInst(cmp);
2972
2973 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2974 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2975 ::basicBlock->appendInst(select);
2976
2977 return RValue<Int4>(V(result));
2978}
2979
2980RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2981{
2982 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2983 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2984 ::basicBlock->appendInst(cmp);
2985
2986 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2987 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2988 ::basicBlock->appendInst(select);
2989
2990 return RValue<Int4>(V(result));
2991}
2992
2993RValue<Int4> RoundInt(RValue<Float4> cast)
2994{
2995 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002996 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002997 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2998 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002999 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003000 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003001 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003002 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05003003 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3004 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3005 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3006 nearbyint->addArg(cast.value);
3007 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003008
3009 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003010 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003011}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003012
Nicolas Capens157ba262019-12-10 17:49:14 -05003013RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3014{
3015 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003016 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003017 Short8 result;
3018 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3019 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3020 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3021 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3022 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3023 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3024 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3025 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003026
Nicolas Capens157ba262019-12-10 17:49:14 -05003027 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003028 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003029 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003030 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003031 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3032 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3033 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3034 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3035 pack->addArg(x.value);
3036 pack->addArg(y.value);
3037 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003038
Nicolas Capens157ba262019-12-10 17:49:14 -05003039 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003040 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003041}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003042
Nicolas Capens157ba262019-12-10 17:49:14 -05003043RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3044{
3045 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003046 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003047 RValue<Int4> sx = As<Int4>(x);
3048 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003049
Nicolas Capens157ba262019-12-10 17:49:14 -05003050 RValue<Int4> sy = As<Int4>(y);
3051 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003052
Nicolas Capens157ba262019-12-10 17:49:14 -05003053 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003054 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003055 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003056 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003057 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3058 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3059 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3060 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3061 pack->addArg(x.value);
3062 pack->addArg(y.value);
3063 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003064
Nicolas Capens157ba262019-12-10 17:49:14 -05003065 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003066 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003067}
Nicolas Capens33438a62017-09-27 11:47:35 -04003068
Nicolas Capens157ba262019-12-10 17:49:14 -05003069RValue<Int> SignMask(RValue<Int4> x)
3070{
3071 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003072 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003073 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3074 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003075 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003076 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003077 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003078 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3079 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3080 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3081 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3082 movmsk->addArg(x.value);
3083 ::basicBlock->appendInst(movmsk);
3084
3085 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003086 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003087}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003088
Nicolas Capens157ba262019-12-10 17:49:14 -05003089Type *Int4::getType()
3090{
3091 return T(Ice::IceType_v4i32);
3092}
3093
3094UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
3095{
3096 // Smallest positive value representable in UInt, but not in Int
3097 const unsigned int ustart = 0x80000000u;
3098 const float ustartf = float(ustart);
3099
3100 // Check if the value can be represented as an Int
3101 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3102 // If the value is too large, subtract ustart and re-add it after conversion.
3103 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
3104 // Otherwise, just convert normally
3105 (~uiValue & Int4(cast));
3106 // If the value is negative, store 0, otherwise store the result of the conversion
3107 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3108}
3109
3110UInt4::UInt4(RValue<UInt> rhs) : XYZW(this)
3111{
3112 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3113
3114 int swizzle[4] = {0, 0, 0, 0};
3115 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3116
3117 storeValue(replicate);
3118}
3119
3120RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3121{
3122 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003123 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003124 UInt4 result;
3125 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3126 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3127 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3128 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003129
Nicolas Capens157ba262019-12-10 17:49:14 -05003130 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003131 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003132 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003133 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003134 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003135 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003136}
Ben Clayton88816fa2019-05-15 17:08:14 +01003137
Nicolas Capens157ba262019-12-10 17:49:14 -05003138RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3139{
3140 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003141 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003142 UInt4 result;
3143 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3144 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3145 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3146 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003147
Nicolas Capens157ba262019-12-10 17:49:14 -05003148 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003149 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003150 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003151 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003152 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003153 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003154}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003155
Nicolas Capens157ba262019-12-10 17:49:14 -05003156RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3157{
3158 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3159}
3160
3161RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3162{
3163 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3164}
3165
3166RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3167{
3168 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3169}
3170
3171RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3172{
3173 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3174}
3175
3176RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3177{
3178 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3179}
3180
3181RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3182{
3183 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3184}
3185
3186RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3187{
3188 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3189 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3190 ::basicBlock->appendInst(cmp);
3191
3192 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3193 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3194 ::basicBlock->appendInst(select);
3195
3196 return RValue<UInt4>(V(result));
3197}
3198
3199RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3200{
3201 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3202 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3203 ::basicBlock->appendInst(cmp);
3204
3205 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3206 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3207 ::basicBlock->appendInst(select);
3208
3209 return RValue<UInt4>(V(result));
3210}
3211
3212Type *UInt4::getType()
3213{
3214 return T(Ice::IceType_v4i32);
3215}
3216
3217Type *Half::getType()
3218{
3219 return T(Ice::IceType_i16);
3220}
3221
3222RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3223{
3224 return 1.0f / x;
3225}
3226
3227RValue<Float> RcpSqrt_pp(RValue<Float> x)
3228{
3229 return Rcp_pp(Sqrt(x));
3230}
3231
3232RValue<Float> Sqrt(RValue<Float> x)
3233{
3234 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
3235 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3236 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3237 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3238 sqrt->addArg(x.value);
3239 ::basicBlock->appendInst(sqrt);
3240
3241 return RValue<Float>(V(result));
3242}
3243
3244RValue<Float> Round(RValue<Float> x)
3245{
3246 return Float4(Round(Float4(x))).x;
3247}
3248
3249RValue<Float> Trunc(RValue<Float> x)
3250{
3251 return Float4(Trunc(Float4(x))).x;
3252}
3253
3254RValue<Float> Frac(RValue<Float> x)
3255{
3256 return Float4(Frac(Float4(x))).x;
3257}
3258
3259RValue<Float> Floor(RValue<Float> x)
3260{
3261 return Float4(Floor(Float4(x))).x;
3262}
3263
3264RValue<Float> Ceil(RValue<Float> x)
3265{
3266 return Float4(Ceil(Float4(x))).x;
3267}
3268
3269Type *Float::getType()
3270{
3271 return T(Ice::IceType_f32);
3272}
3273
3274Type *Float2::getType()
3275{
3276 return T(Type_v2f32);
3277}
3278
3279Float4::Float4(RValue<Float> rhs) : XYZW(this)
3280{
3281 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3282
3283 int swizzle[4] = {0, 0, 0, 0};
3284 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3285
3286 storeValue(replicate);
3287}
3288
3289RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3290{
3291 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3292 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3293 ::basicBlock->appendInst(cmp);
3294
3295 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3296 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3297 ::basicBlock->appendInst(select);
3298
3299 return RValue<Float4>(V(result));
3300}
3301
3302RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3303{
3304 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3305 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3306 ::basicBlock->appendInst(cmp);
3307
3308 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3309 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3310 ::basicBlock->appendInst(select);
3311
3312 return RValue<Float4>(V(result));
3313}
3314
3315RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3316{
3317 return Float4(1.0f) / x;
3318}
3319
3320RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3321{
3322 return Rcp_pp(Sqrt(x));
3323}
3324
3325RValue<Float4> Sqrt(RValue<Float4> x)
3326{
3327 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003328 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003329 Float4 result;
3330 result.x = Sqrt(Float(Float4(x).x));
3331 result.y = Sqrt(Float(Float4(x).y));
3332 result.z = Sqrt(Float(Float4(x).z));
3333 result.w = Sqrt(Float(Float4(x).w));
3334
3335 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003336 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003337 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003338 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003339 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensd52e9362016-10-31 23:23:15 -04003340 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3341 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3342 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3343 sqrt->addArg(x.value);
3344 ::basicBlock->appendInst(sqrt);
3345
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003346 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003347 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348}
Nicolas Capens157ba262019-12-10 17:49:14 -05003349
3350RValue<Int> SignMask(RValue<Float4> x)
3351{
3352 if(emulateIntrinsics || CPUID::ARM)
3353 {
3354 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3355 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3356 }
3357 else
3358 {
3359 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3360 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3361 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3362 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3363 movmsk->addArg(x.value);
3364 ::basicBlock->appendInst(movmsk);
3365
3366 return RValue<Int>(V(result));
3367 }
3368}
3369
3370RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3371{
3372 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3373}
3374
3375RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3376{
3377 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3378}
3379
3380RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3381{
3382 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3383}
3384
3385RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3386{
3387 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3388}
3389
3390RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3391{
3392 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3393}
3394
3395RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3396{
3397 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3398}
3399
3400RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3401{
3402 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3403}
3404
3405RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3406{
3407 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3408}
3409
3410RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3411{
3412 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3413}
3414
3415RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3416{
3417 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3418}
3419
3420RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3421{
3422 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3423}
3424
3425RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3426{
3427 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3428}
3429
3430RValue<Float4> Round(RValue<Float4> x)
3431{
3432 if(emulateIntrinsics || CPUID::ARM)
3433 {
3434 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3435 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3436 }
3437 else if(CPUID::SSE4_1)
3438 {
3439 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3440 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3441 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3442 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3443 round->addArg(x.value);
3444 round->addArg(::context->getConstantInt32(0));
3445 ::basicBlock->appendInst(round);
3446
3447 return RValue<Float4>(V(result));
3448 }
3449 else
3450 {
3451 return Float4(RoundInt(x));
3452 }
3453}
3454
3455RValue<Float4> Trunc(RValue<Float4> x)
3456{
3457 if(CPUID::SSE4_1)
3458 {
3459 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3460 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3461 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3462 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3463 round->addArg(x.value);
3464 round->addArg(::context->getConstantInt32(3));
3465 ::basicBlock->appendInst(round);
3466
3467 return RValue<Float4>(V(result));
3468 }
3469 else
3470 {
3471 return Float4(Int4(x));
3472 }
3473}
3474
3475RValue<Float4> Frac(RValue<Float4> x)
3476{
3477 Float4 frc;
3478
3479 if(CPUID::SSE4_1)
3480 {
3481 frc = x - Floor(x);
3482 }
3483 else
3484 {
3485 frc = x - Float4(Int4(x)); // Signed fractional part.
3486
3487 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
3488 }
3489
3490 // x - floor(x) can be 1.0 for very small negative x.
3491 // Clamp against the value just below 1.0.
3492 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
3493}
3494
3495RValue<Float4> Floor(RValue<Float4> x)
3496{
3497 if(CPUID::SSE4_1)
3498 {
3499 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3500 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3501 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3502 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3503 round->addArg(x.value);
3504 round->addArg(::context->getConstantInt32(1));
3505 ::basicBlock->appendInst(round);
3506
3507 return RValue<Float4>(V(result));
3508 }
3509 else
3510 {
3511 return x - Frac(x);
3512 }
3513}
3514
3515RValue<Float4> Ceil(RValue<Float4> x)
3516{
3517 if(CPUID::SSE4_1)
3518 {
3519 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3520 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3521 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3522 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3523 round->addArg(x.value);
3524 round->addArg(::context->getConstantInt32(2));
3525 ::basicBlock->appendInst(round);
3526
3527 return RValue<Float4>(V(result));
3528 }
3529 else
3530 {
3531 return -Floor(-x);
3532 }
3533}
3534
3535Type *Float4::getType()
3536{
3537 return T(Ice::IceType_v4f32);
3538}
3539
3540RValue<Long> Ticks()
3541{
3542 UNIMPLEMENTED("RValue<Long> Ticks()");
3543 return Long(Int(0));
3544}
3545
3546RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3547{
3548 if (sizeof(void*) == 8)
3549 {
3550 return RValue<Pointer<Byte>>(V(::context->getConstantInt64(reinterpret_cast<intptr_t>(ptr))));
3551 }
3552 else
3553 {
3554 return RValue<Pointer<Byte>>(V(::context->getConstantInt32(reinterpret_cast<intptr_t>(ptr))));
3555 }
3556}
3557
3558RValue<Pointer<Byte>> ConstantData(void const * data, size_t size)
3559{
3560 // TODO: Try to use Ice::VariableDeclaration::DataInitializer and
3561 // getConstantSym instead of tagging data on the routine.
3562 return ConstantPointer(::routine->addConstantData(data, size));
3563}
3564
3565Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3566{
3567 Ice::Variable *ret = nullptr;
3568 if (retTy != nullptr)
3569 {
3570 ret = ::function->makeVariable(T(retTy));
3571 }
3572 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
3573 for (auto arg : args)
3574 {
3575 call->addArg(V(arg));
3576 }
3577 ::basicBlock->appendInst(call);
3578 return V(ret);
3579}
3580
3581void Breakpoint()
3582{
3583 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3584 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3585 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3586 ::basicBlock->appendInst(trap);
3587}
3588
3589void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
3590Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; }
3591void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); }
3592
3593RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3594{
3595 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3596}
3597
3598RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3599{
3600 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3601}
3602
3603void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3604{
3605 return emulated::Scatter(base, val, offsets, mask, alignment);
3606}
3607
3608void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3609{
3610 return emulated::Scatter(base, val, offsets, mask, alignment);
3611}
3612
3613RValue<Float> Exp2(RValue<Float> x)
3614{
3615 return emulated::Exp2(x);
3616}
3617
3618RValue<Float> Log2(RValue<Float> x)
3619{
3620 return emulated::Log2(x);
3621}
3622
3623RValue<Float4> Sin(RValue<Float4> x)
3624{
3625 return emulated::Sin(x);
3626}
3627
3628RValue<Float4> Cos(RValue<Float4> x)
3629{
3630 return emulated::Cos(x);
3631}
3632
3633RValue<Float4> Tan(RValue<Float4> x)
3634{
3635 return emulated::Tan(x);
3636}
3637
3638RValue<Float4> Asin(RValue<Float4> x)
3639{
3640 return emulated::Asin(x);
3641}
3642
3643RValue<Float4> Acos(RValue<Float4> x)
3644{
3645 return emulated::Acos(x);
3646}
3647
3648RValue<Float4> Atan(RValue<Float4> x)
3649{
3650 return emulated::Atan(x);
3651}
3652
3653RValue<Float4> Sinh(RValue<Float4> x)
3654{
3655 return emulated::Sinh(x);
3656}
3657
3658RValue<Float4> Cosh(RValue<Float4> x)
3659{
3660 return emulated::Cosh(x);
3661}
3662
3663RValue<Float4> Tanh(RValue<Float4> x)
3664{
3665 return emulated::Tanh(x);
3666}
3667
3668RValue<Float4> Asinh(RValue<Float4> x)
3669{
3670 return emulated::Asinh(x);
3671}
3672
3673RValue<Float4> Acosh(RValue<Float4> x)
3674{
3675 return emulated::Acosh(x);
3676}
3677
3678RValue<Float4> Atanh(RValue<Float4> x)
3679{
3680 return emulated::Atanh(x);
3681}
3682
3683RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3684{
3685 return emulated::Atan2(x, y);
3686}
3687
3688RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3689{
3690 return emulated::Pow(x, y);
3691}
3692
3693RValue<Float4> Exp(RValue<Float4> x)
3694{
3695 return emulated::Exp(x);
3696}
3697
3698RValue<Float4> Log(RValue<Float4> x)
3699{
3700 return emulated::Log(x);
3701}
3702
3703RValue<Float4> Exp2(RValue<Float4> x)
3704{
3705 return emulated::Exp2(x);
3706}
3707
3708RValue<Float4> Log2(RValue<Float4> x)
3709{
3710 return emulated::Log2(x);
3711}
3712
3713RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
3714{
3715 if (emulateIntrinsics)
3716 {
3717 UNIMPLEMENTED("Subzero Ctlz()"); return UInt(0);
3718 }
3719 else
3720 {
3721 Ice::Variable* result = ::function->makeVariable(Ice::IceType_i32);
3722 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3723 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3724 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3725 ctlz->addArg(x.value);
3726 ::basicBlock->appendInst(ctlz);
3727
3728 return RValue<UInt>(V(result));
3729 }
3730}
3731
3732RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
3733{
3734 if (emulateIntrinsics)
3735 {
3736 UNIMPLEMENTED("Subzero Ctlz()"); return UInt4(0);
3737 }
3738 else
3739 {
3740 // TODO: implement vectorized version in Subzero
3741 UInt4 result;
3742 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
3743 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
3744 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
3745 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
3746 return result;
3747 }
3748}
3749
3750RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
3751{
3752 if (emulateIntrinsics)
3753 {
3754 UNIMPLEMENTED("Subzero Cttz()"); return UInt(0);
3755 }
3756 else
3757 {
3758 Ice::Variable* result = ::function->makeVariable(Ice::IceType_i32);
3759 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3760 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3761 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3762 ctlz->addArg(x.value);
3763 ::basicBlock->appendInst(ctlz);
3764
3765 return RValue<UInt>(V(result));
3766 }
3767}
3768
3769RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
3770{
3771 if (emulateIntrinsics)
3772 {
3773 UNIMPLEMENTED("Subzero Cttz()"); return UInt4(0);
3774 }
3775 else
3776 {
3777 // TODO: implement vectorized version in Subzero
3778 UInt4 result;
3779 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
3780 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
3781 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
3782 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
3783 return result;
3784 }
3785}
3786
3787void EmitDebugLocation() {}
3788void EmitDebugVariable(Value* value) {}
3789void FlushDebug() {}
3790
3791void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
3792{
3793 // Subzero currently only supports coroutines as functions (i.e. that do not yield)
3794 createFunction(YieldType, Params);
3795}
3796
3797static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void* yieldValue) { return false; }
3798static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle) {}
3799
3800std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
3801{
3802 // acquireRoutine sets the CoroutineEntryBegin entry
3803 auto coroutineEntry = acquireRoutine(name, cfgEdit);
3804
3805 // For now, set the await and destroy entries to stubs, until we add proper coroutine support to the Subzero backend
3806 auto routine = std::static_pointer_cast<ELFMemoryStreamer>(coroutineEntry);
3807 routine->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void*>(&coroutineEntryAwaitStub));
3808 routine->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void*>(&coroutineEntryDestroyStub));
3809
3810 return coroutineEntry;
3811}
3812
3813void Nucleus::yield(Value* val) { UNIMPLEMENTED("Yield"); }
3814
3815} // namespace rr