1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Reactor.hpp"
16#include "Debug.hpp"
17
18#include "Optimizer.hpp"
19#include "ExecutableMemory.hpp"
20
21#include "src/IceTypes.h"
22#include "src/IceCfg.h"
23#include "src/IceELFStreamer.h"
24#include "src/IceGlobalContext.h"
25#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
27#include "src/IceGlobalInits.h"
28
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Support/raw_os_ostream.h"
31#include "llvm/Support/Compiler.h"
32
33#if __has_feature(memory_sanitizer)
34#include <sanitizer/msan_interface.h>
35#endif
36
37#if defined(_WIN32)
38#ifndef WIN32_LEAN_AND_MEAN
39#define WIN32_LEAN_AND_MEAN
40#endif // !WIN32_LEAN_AND_MEAN
41#ifndef NOMINMAX
42#define NOMINMAX
43#endif // !NOMINMAX
44#include <Windows.h>
45#else
46#include <sys/mman.h>
47#if !defined(MAP_ANONYMOUS)
48#define MAP_ANONYMOUS MAP_ANON
49#endif
50#endif
51
52#include <mutex>
53#include <limits>
54#include <iostream>
55
56namespace rr
57{
58 class ELFMemoryStreamer;
59}
60
61namespace
62{
63 // Default configuration settings. Must be accessed under mutex lock.
64 std::mutex defaultConfigLock;
65 rr::Config &defaultConfig()
66 {
67 // This uses a static in a function to avoid the cost of a global static
68 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
69 static rr::Config config = rr::Config::Edit()
70 .set(rr::Optimization::Level::Default)
71 .apply({});
72 return config;
73 }
74
75 Ice::GlobalContext *context = nullptr;
76 Ice::Cfg *function = nullptr;
77 Ice::CfgNode *basicBlock = nullptr;
78 Ice::CfgLocalAllocatorScope *allocator = nullptr;
79 rr::ELFMemoryStreamer *routine = nullptr;
80
81 std::mutex codegenMutex;
82
83 Ice::ELFFileStreamer *elfFile = nullptr;
84 Ice::Fdstream *out = nullptr;
85}
86
87namespace
88{
89 #if !defined(__i386__) && defined(_M_IX86)
90 #define __i386__ 1
91 #endif
92
93 #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
94 #define __x86_64__ 1
95 #endif
96
97 static Ice::OptLevel toIce(rr::Optimization::Level level)
98 {
99 switch (level)
100 {
101 case rr::Optimization::Level::None: return Ice::Opt_0;
102 case rr::Optimization::Level::Less: return Ice::Opt_1;
103 case rr::Optimization::Level::Default: return Ice::Opt_2;
104 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
105 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
106 }
107 return Ice::Opt_2;
108 }
109
110 class CPUID
111 {
112 public:
113 const static bool ARM;
114 const static bool SSE4_1;
115
116 private:
117 static void cpuid(int registers[4], int info)
118 {
119 #if defined(__i386__) || defined(__x86_64__)
120 #if defined(_WIN32)
121 __cpuid(registers, info);
122 #else
123 __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
124 #endif
125 #else
126 registers[0] = 0;
127 registers[1] = 0;
128 registers[2] = 0;
129 registers[3] = 0;
130 #endif
131 }
132
133 static bool detectARM()
134 {
135 #if defined(__arm__) || defined(__aarch64__)
136 return true;
137 #elif defined(__i386__) || defined(__x86_64__)
138 return false;
139 #elif defined(__mips__)
140 return false;
141 #else
142 #error "Unknown architecture"
143 #endif
144 }
145
146 static bool detectSSE4_1()
147 {
148 #if defined(__i386__) || defined(__x86_64__)
149 int registers[4];
150 cpuid(registers, 1);
151 return (registers[2] & 0x00080000) != 0;
152 #else
153 return false;
154 #endif
155 }
156 };
157
158 const bool CPUID::ARM = CPUID::detectARM();
159 const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
160 const bool emulateIntrinsics = false;
161 const bool emulateMismatchedBitCast = CPUID::ARM;
162}
163
164namespace rr
165{
166 const Capabilities Caps =
167 {
168 false, // CoroutinesSupported
169 };
170
171 enum EmulatedType
172 {
173 EmulatedShift = 16,
174 EmulatedV2 = 2 << EmulatedShift,
175 EmulatedV4 = 4 << EmulatedShift,
176 EmulatedV8 = 8 << EmulatedShift,
177 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
178
179 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
180 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
181 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
182 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
183 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
184 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
185 };
186
187 class Value : public Ice::Operand {};
188 class SwitchCases : public Ice::InstSwitch {};
189 class BasicBlock : public Ice::CfgNode {};
190
191 Ice::Type T(Type *t)
192 {
193 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
194 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
195 }
196
197 Type *T(Ice::Type t)
198 {
199 return reinterpret_cast<Type*>(t);
200 }
201
202 Type *T(EmulatedType t)
203 {
204 return reinterpret_cast<Type*>(t);
205 }
206
207 Value *V(Ice::Operand *v)
208 {
209 return reinterpret_cast<Value*>(v);
210 }
211
212 BasicBlock *B(Ice::CfgNode *b)
213 {
214 return reinterpret_cast<BasicBlock*>(b);
215 }
216
217 static size_t typeSize(Type *type)
218 {
219 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
220 {
221 switch(reinterpret_cast<std::intptr_t>(type))
222 {
223 case Type_v2i32: return 8;
224 case Type_v4i16: return 8;
225 case Type_v2i16: return 4;
226 case Type_v8i8: return 8;
227 case Type_v4i8: return 4;
228 case Type_v2f32: return 8;
229 default: ASSERT(false);
230 }
231 }
232
233 return Ice::typeWidthInBytes(T(type));
234 }
235
236 using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
237 using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
238
239 inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
240 {
241 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
242 }
243
244 inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
245 {
246 return &sectionHeader(elfHeader)[index];
247 }
248
249 static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
250 {
251 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
252
253 uint32_t index = relocation.getSymbol();
254 int table = relocationTable.sh_link;
255 void *symbolValue = nullptr;
256
257 if(index != SHN_UNDEF)
258 {
259 if(table == SHN_UNDEF) return nullptr;
260 const SectionHeader *symbolTable = elfSection(elfHeader, table);
261
262 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
263 if(index >= symtab_entries)
264 {
265 ASSERT(index < symtab_entries && "Symbol Index out of range");
266 return nullptr;
267 }
268
269 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
270 Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
271 uint16_t section = symbol.st_shndx;
272
273 if(section != SHN_UNDEF && section < SHN_LORESERVE)
274 {
275 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
276 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
277 }
278 else
279 {
280 return nullptr;
281 }
282 }
283
284 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
285 unaligned_ptr<int32_t> patchSite = (int32_t*)(address + relocation.r_offset);
286
287 if(CPUID::ARM)
288 {
289 switch(relocation.getType())
290 {
291 case R_ARM_NONE:
292 // No relocation
293 break;
294 case R_ARM_MOVW_ABS_NC:
295 {
296 uint32_t thumb = 0; // Calls to Thumb code not supported.
297 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
298 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
299 }
300 break;
301 case R_ARM_MOVT_ABS:
302 {
303 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
304 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
305 }
306 break;
307 default:
308 ASSERT(false && "Unsupported relocation type");
309 return nullptr;
310 }
311 }
312 else
313 {
314 switch(relocation.getType())
315 {
316 case R_386_NONE:
317 // No relocation
318 break;
319 case R_386_32:
320 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
321 break;
322 case R_386_PC32:
323 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
324 break;
325 default:
326 ASSERT(false && "Unsupported relocation type");
327 return nullptr;
328 }
329 }
330
331 return symbolValue;
332 }
333
334 static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
335 {
336 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
337
338 uint32_t index = relocation.getSymbol();
339 int table = relocationTable.sh_link;
340 void *symbolValue = nullptr;
341
342 if(index != SHN_UNDEF)
343 {
344 if(table == SHN_UNDEF) return nullptr;
345 const SectionHeader *symbolTable = elfSection(elfHeader, table);
346
347 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
348 if(index >= symtab_entries)
349 {
350 ASSERT(index < symtab_entries && "Symbol Index out of range");
351 return nullptr;
352 }
353
354 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
355 Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
356 uint16_t section = symbol.st_shndx;
357
358 if(section != SHN_UNDEF && section < SHN_LORESERVE)
359 {
360 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
361 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
362 }
363 else
364 {
365 return nullptr;
366 }
367 }
368
369 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
370 unaligned_ptr<int32_t> patchSite32 = (int32_t*)(address + relocation.r_offset);
371 unaligned_ptr<int64_t> patchSite64 = (int64_t*)(address + relocation.r_offset);
372
373 switch(relocation.getType())
374 {
375 case R_X86_64_NONE:
376 // No relocation
377 break;
378 case R_X86_64_64:
379 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
380 break;
381 case R_X86_64_PC32:
382 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
383 break;
384 case R_X86_64_32S:
385 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
386 break;
387 default:
388 ASSERT(false && "Unsupported relocation type");
389 return nullptr;
390 }
391
392 return symbolValue;
393 }
394
395 void *loadImage(uint8_t *const elfImage, size_t &codeSize)
396 {
397 ElfHeader *elfHeader = (ElfHeader*)elfImage;
398
399 if(!elfHeader->checkMagic())
400 {
401 return nullptr;
402 }
403
404 // Expect ELF bitness to match platform
405 ASSERT(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
406 #if defined(__i386__)
407 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
408 #elif defined(__x86_64__)
409 ASSERT(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
410 #elif defined(__arm__)
411 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
412 #elif defined(__aarch64__)
413 ASSERT(sizeof(void*) == 8 && elfHeader->e_machine == EM_AARCH64);
414 #elif defined(__mips__)
415 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_MIPS);
416 #else
417 #error "Unsupported platform"
418 #endif
419
420 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
421 void *entry = nullptr;
422
423 for(int i = 0; i < elfHeader->e_shnum; i++)
424 {
425 if(sectionHeader[i].sh_type == SHT_PROGBITS)
426 {
427 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
428 {
429 entry = elfImage + sectionHeader[i].sh_offset;
430 codeSize = sectionHeader[i].sh_size;
431 }
432 }
433 else if(sectionHeader[i].sh_type == SHT_REL)
434 {
435 ASSERT(sizeof(void*) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
436
437 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
438 {
439 const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
440 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
441 }
442 }
443 else if(sectionHeader[i].sh_type == SHT_RELA)
444 {
445 ASSERT(sizeof(void*) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
446
447 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
448 {
449 const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
450 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
451 }
452 }
453 }
454
455 return entry;
456 }
457
458 template<typename T>
459 struct ExecutableAllocator
460 {
461 ExecutableAllocator() {}
462 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {}
463
464 using value_type = T;
465 using size_type = std::size_t;
466
467 T *allocate(size_type n)
468 {
469 return (T*)allocateExecutable(sizeof(T) * n);
470 }
471
472 void deallocate(T *p, size_type n)
473 {
474 deallocateExecutable(p, sizeof(T) * n);
475 }
476 };
477
478 class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
479 {
480 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
481 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
482
483 public:
484 ELFMemoryStreamer() : Routine(), entry(nullptr)
485 {
486 position = 0;
487 buffer.reserve(0x1000);
488 }
489
490 ~ELFMemoryStreamer() override
491 {
492 #if defined(_WIN32)
493 if(buffer.size() != 0)
494 {
495 DWORD exeProtection;
496 VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
497 }
498 #endif
499 }
500
501 void write8(uint8_t Value) override
502 {
503 if(position == (uint64_t)buffer.size())
504 {
505 buffer.push_back(Value);
506 position++;
507 }
508 else if(position < (uint64_t)buffer.size())
509 {
510 buffer[position] = Value;
511 position++;
512 }
513 else ASSERT(false && "UNIMPLEMENTED");
514 }
515
516 void writeBytes(llvm::StringRef Bytes) override
517 {
518 std::size_t oldSize = buffer.size();
519 buffer.resize(oldSize + Bytes.size());
520 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
521 position += Bytes.size();
522 }
523
524 uint64_t tell() const override { return position; }
525
526 void seek(uint64_t Off) override { position = Off; }
527
528 const void *getEntry(int index) override
529 {
530 ASSERT(index == 0); // Subzero does not support multiple entry points per routine yet.
531 if(!entry)
532 {
533 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
534
535 size_t codeSize = 0;
536 entry = loadImage(&buffer[0], codeSize);
537
538 #if defined(_WIN32)
539 VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READ, &oldProtection);
540 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
541 #else
542 mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_EXEC);
543 __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
544 #endif
545 }
546
547 return entry;
548 }
549
550 const void* addConstantData(const void* data, size_t size)
551 {
552 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[size]);
553 memcpy(buf.get(), data, size);
554 auto ptr = buf.get();
555 constantData.emplace_back(std::move(buf));
556 return ptr;
557 }
558
559 private:
560 void *entry;
561 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
562 std::size_t position;
563 std::vector<std::unique_ptr<uint8_t[]>> constantData;
564
565 #if defined(_WIN32)
566 DWORD oldProtection;
567 #endif
568 };
569
570 Nucleus::Nucleus()
571 {
572 ::codegenMutex.lock(); // Reactor is currently not thread safe
573
574 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
575 Ice::ClFlags::getParsedClFlags(Flags);
576
577 #if defined(__arm__)
578 Flags.setTargetArch(Ice::Target_ARM32);
579 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
580 #elif defined(__mips__)
581 Flags.setTargetArch(Ice::Target_MIPS32);
582 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
583 #else // x86
584 Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
585 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
586 #endif
587 Flags.setOutFileType(Ice::FT_Elf);
588 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
589 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
590 Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
591 Flags.setDisableHybridAssembly(true);
592
593 static llvm::raw_os_ostream cout(std::cout);
594 static llvm::raw_os_ostream cerr(std::cerr);
595
596 if(false) // Write out to a file
597 {
598 std::error_code errorCode;
599 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
600 ::elfFile = new Ice::ELFFileStreamer(*out);
601 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
602 }
603 else
604 {
605 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
606 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
607 ::routine = elfMemory;
608 }
609 }
610
611 Nucleus::~Nucleus()
612 {
613 delete ::routine;
614
615 delete ::allocator;
616 delete ::function;
617 delete ::context;
618
619 delete ::elfFile;
620 delete ::out;
621
622 ::codegenMutex.unlock();
623 }
624
625 void Nucleus::setDefaultConfig(const Config &cfg)
626 {
627 std::unique_lock<std::mutex> lock(::defaultConfigLock);
628 ::defaultConfig() = cfg;
629 }
630
631 void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
632 {
633 std::unique_lock<std::mutex> lock(::defaultConfigLock);
634 auto &config = ::defaultConfig();
635 config = cfgEdit.apply(config);
636 }
637
638 Config Nucleus::getDefaultConfig()
639 {
640 std::unique_lock<std::mutex> lock(::defaultConfigLock);
641 return ::defaultConfig();
642 }
643
644 std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
645 {
646 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
647 {
648 createRetVoid();
649 }
650
651 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, name));
652
653 rr::optimize(::function);
654
655 ::function->translate();
656 ASSERT(!::function->hasError());
657
658 auto globals = ::function->getGlobalInits();
659
660 if(globals && !globals->empty())
661 {
662 ::context->getGlobals()->merge(globals.get());
663 }
664
665 ::context->emitFileHeader();
666 ::function->emitIAS();
667 auto assembler = ::function->releaseAssembler();
668 auto objectWriter = ::context->getObjectWriter();
669 assembler->alignFunction();
670 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
671 ::context->lowerGlobals("last");
672 ::context->lowerConstants();
673 ::context->lowerJumpTables();
674 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
675 objectWriter->writeNonUserSections();
676
677 Routine *handoffRoutine = ::routine;
678 ::routine = nullptr;
679
680 return std::shared_ptr<Routine>(handoffRoutine);
681 }
682
683 Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
684 {
685 Ice::Type type = T(t);
686 int typeSize = Ice::typeWidthInBytes(type);
687 int totalSize = typeSize * (arraySize ? arraySize : 1);
688
689 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
690 auto address = ::function->makeVariable(T(getPointerType(t)));
691 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
692 ::function->getEntryNode()->getInsts().push_front(alloca);
693
694 return V(address);
695 }
696
697 BasicBlock *Nucleus::createBasicBlock()
698 {
699 return B(::function->makeNode());
700 }
701
702 BasicBlock *Nucleus::getInsertBlock()
703 {
704 return B(::basicBlock);
705 }
706
707 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
708 {
709 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
710
711 Variable::materializeAll();
712
713 ::basicBlock = basicBlock;
714 }
715
716 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
717 {
718 uint32_t sequenceNumber = 0;
719 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
720 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
721
722 for(Type *type : Params)
723 {
724 Ice::Variable *arg = ::function->makeVariable(T(type));
725 ::function->addArg(arg);
726 }
727
728 Ice::CfgNode *node = ::function->makeNode();
729 ::function->setEntryNode(node);
730 ::basicBlock = node;
731 }
732
733 Value *Nucleus::getArgument(unsigned int index)
734 {
735 return V(::function->getArgs()[index]);
736 }
737
738 void Nucleus::createRetVoid()
739 {
740 // Code generated after this point is unreachable, so any variables
741 // being read can safely return an undefined value. We have to avoid
742 // materializing variables after the terminator ret instruction.
743 Variable::killUnmaterialized();
744
745 Ice::InstRet *ret = Ice::InstRet::create(::function);
746 ::basicBlock->appendInst(ret);
747 }
748
749 void Nucleus::createRet(Value *v)
750 {
751 // Code generated after this point is unreachable, so any variables
752 // being read can safely return an undefined value. We have to avoid
753 // materializing variables after the terminator ret instruction.
754 Variable::killUnmaterialized();
755
756 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
757 ::basicBlock->appendInst(ret);
758 }
759
760 void Nucleus::createBr(BasicBlock *dest)
761 {
762 Variable::materializeAll();
763
764 auto br = Ice::InstBr::create(::function, dest);
765 ::basicBlock->appendInst(br);
766 }
767
768 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
769 {
770 Variable::materializeAll();
771
772 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
773 ::basicBlock->appendInst(br);
774 }
775
776 static bool isCommutative(Ice::InstArithmetic::OpKind op)
777 {
778 switch(op)
779 {
780 case Ice::InstArithmetic::Add:
781 case Ice::InstArithmetic::Fadd:
782 case Ice::InstArithmetic::Mul:
783 case Ice::InstArithmetic::Fmul:
784 case Ice::InstArithmetic::And:
785 case Ice::InstArithmetic::Or:
786 case Ice::InstArithmetic::Xor:
787 return true;
788 default:
789 return false;
790 }
791 }
792
793 static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
794 {
795 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
796
797 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
798
799 Ice::Variable *result = ::function->makeVariable(lhs->getType());
800 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
801 ::basicBlock->appendInst(arithmetic);
802
803 return V(result);
804 }
805
806 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
807 {
808 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
809 }
810
811 Value *Nucleus::createSub(Value *lhs, Value *rhs)
812 {
813 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
814 }
815
816 Value *Nucleus::createMul(Value *lhs, Value *rhs)
817 {
818 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
819 }
820
821 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
822 {
823 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
824 }
825
826 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
827 {
828 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
829 }
830
831 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
832 {
833 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
834 }
835
836 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
837 {
838 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
839 }
840
841 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
842 {
843 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
844 }
845
846 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
847 {
848 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
849 }
850
851 Value *Nucleus::createURem(Value *lhs, Value *rhs)
852 {
853 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
854 }
855
856 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
857 {
858 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
859 }
860
861 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
862 {
863 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
864 }
865
866 Value *Nucleus::createShl(Value *lhs, Value *rhs)
867 {
868 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
869 }
870
871 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
872 {
873 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
874 }
875
876 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
877 {
878 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
879 }
880
881 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
882 {
883 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
884 }
885
886 Value *Nucleus::createOr(Value *lhs, Value *rhs)
887 {
888 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
889 }
890
891 Value *Nucleus::createXor(Value *lhs, Value *rhs)
892 {
893 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
894 }
895
896 Value *Nucleus::createNeg(Value *v)
897 {
898 return createSub(createNullValue(T(v->getType())), v);
899 }
900
901 Value *Nucleus::createFNeg(Value *v)
902 {
903 double c[4] = {-0.0, -0.0, -0.0, -0.0};
904 Value *negativeZero = Ice::isVectorType(v->getType()) ?
905 createConstantVector(c, T(v->getType())) :
906 V(::context->getConstantFloat(-0.0f));
907
908 return createFSub(negativeZero, v);
909 }
910
911 Value *Nucleus::createNot(Value *v)
912 {
913 if(Ice::isScalarIntegerType(v->getType()))
914 {
915 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
916 }
917 else // Vector
918 {
919 int64_t c[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
920 return createXor(v, createConstantVector(c, T(v->getType())));
921 }
922 }
923
924 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
925 {
926 ASSERT(!atomic); // Unimplemented
927 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
928
929 int valueType = (int)reinterpret_cast<intptr_t>(type);
930 Ice::Variable *result = ::function->makeVariable(T(type));
931
932 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
933 {
934 if(emulateIntrinsics)
935 {
936 if(typeSize(type) == 4)
937 {
938 auto pointer = RValue<Pointer<Byte>>(ptr);
939 Int x = *Pointer<Int>(pointer);
940
941 Int4 vector;
942 vector = Insert(vector, x, 0);
943
944 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
945 ::basicBlock->appendInst(bitcast);
946 }
947 else if(typeSize(type) == 8)
948 {
949 auto pointer = RValue<Pointer<Byte>>(ptr);
950 Int x = *Pointer<Int>(pointer);
951 Int y = *Pointer<Int>(pointer + 4);
952
953 Int4 vector;
954 vector = Insert(vector, x, 0);
955 vector = Insert(vector, y, 1);
956
957 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
958 ::basicBlock->appendInst(bitcast);
959 }
960 else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
961 }
962 else
963 {
964 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
965 auto target = ::context->getConstantUndef(Ice::IceType_i32);
966 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
967 load->addArg(ptr);
968 load->addArg(::context->getConstantInt32(typeSize(type)));
969 ::basicBlock->appendInst(load);
970 }
971 }
972 else
973 {
974 auto load = Ice::InstLoad::create(::function, result, ptr, align);
975 ::basicBlock->appendInst(load);
976 }
977
978 return V(result);
979 }
980
981 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
982 {
983 ASSERT(!atomic); // Unimplemented
984 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
985
986 #if __has_feature(memory_sanitizer)
987 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
988 if(align != 0)
989 {
990 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
991 call->addArg(ptr);
992 call->addArg(::context->getConstantInt64(typeSize(type)));
993 ::basicBlock->appendInst(call);
994 }
995 #endif
996
997 int valueType = (int)reinterpret_cast<intptr_t>(type);
998
999 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
1000 {
1001 if(emulateIntrinsics)
1002 {
1003 if(typeSize(type) == 4)
1004 {
1005 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1006 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1007 ::basicBlock->appendInst(bitcast);
1008
1009 RValue<Int4> v(V(vector));
1010
1011 auto pointer = RValue<Pointer<Byte>>(ptr);
1012 Int x = Extract(v, 0);
1013 *Pointer<Int>(pointer) = x;
1014 }
1015 else if(typeSize(type) == 8)
1016 {
1017 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1018 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1019 ::basicBlock->appendInst(bitcast);
1020
1021 RValue<Int4> v(V(vector));
1022
1023 auto pointer = RValue<Pointer<Byte>>(ptr);
1024 Int x = Extract(v, 0);
1025 *Pointer<Int>(pointer) = x;
1026 Int y = Extract(v, 1);
1027 *Pointer<Int>(pointer + 4) = y;
1028 }
1029 else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
1030 }
1031 else
1032 {
1033 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
1034 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1035 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1036 store->addArg(value);
1037 store->addArg(ptr);
1038 store->addArg(::context->getConstantInt32(typeSize(type)));
1039 ::basicBlock->appendInst(store);
1040 }
1041 }
1042 else
1043 {
1044 ASSERT(value->getType() == T(type));
1045
1046 auto store = Ice::InstStore::create(::function, value, ptr, align);
1047 ::basicBlock->appendInst(store);
1048 }
1049
1050 return value;
1051 }
1052
1053 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1054 {
1055 ASSERT(index->getType() == Ice::IceType_i32);
1056
1057 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1058 {
1059 int32_t offset = constant->getValue() * (int)typeSize(type);
1060
1061 if(offset == 0)
1062 {
1063 return ptr;
1064 }
1065
1066 return createAdd(ptr, createConstantInt(offset));
1067 }
1068
1069 if(!Ice::isByteSizedType(T(type)))
1070 {
1071 index = createMul(index, createConstantInt((int)typeSize(type)));
1072 }
1073
1074 if(sizeof(void*) == 8)
1075 {
1076 if(unsignedIndex)
1077 {
1078 index = createZExt(index, T(Ice::IceType_i64));
1079 }
1080 else
1081 {
1082 index = createSExt(index, T(Ice::IceType_i64));
1083 }
1084 }
1085
1086 return createAdd(ptr, index);
1087 }
1088
1089 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1090 {
1091 UNIMPLEMENTED("createAtomicAdd");
1092 return nullptr;
1093 }
1094
1095 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1096 {
1097 UNIMPLEMENTED("createAtomicSub");
1098 return nullptr;
1099 }
1100
1101 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1102 {
1103 UNIMPLEMENTED("createAtomicAnd");
1104 return nullptr;
1105 }
1106
1107 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1108 {
1109 UNIMPLEMENTED("createAtomicOr");
1110 return nullptr;
1111 }
1112
1113 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1114 {
1115 UNIMPLEMENTED("createAtomicXor");
1116 return nullptr;
1117 }
1118
1119 Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1120 {
1121 UNIMPLEMENTED("createAtomicMin");
1122 return nullptr;
1123 }
1124
1125 Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1126 {
1127 UNIMPLEMENTED("createAtomicMax");
1128 return nullptr;
1129 }
1130
1131 Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
1132 {
1133 UNIMPLEMENTED("createAtomicUMin");
1134 return nullptr;
1135 }
1136
1137 Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
1138 {
1139 UNIMPLEMENTED("createAtomicUMax");
1140 return nullptr;
1141 }
1142
1143 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1144 {
1145 UNIMPLEMENTED("createAtomicExchange");
1146 return nullptr;
1147 }
1148
1149 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1150 {
1151 UNIMPLEMENTED("createAtomicCompareExchange");
1152 return nullptr;
1153 }
1154
1155 static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1156 {
1157 if(v->getType() == T(destType))
1158 {
1159 return v;
1160 }
1161
1162 Ice::Variable *result = ::function->makeVariable(T(destType));
1163 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1164 ::basicBlock->appendInst(cast);
1165
1166 return V(result);
1167 }
1168
1169 Value *Nucleus::createTrunc(Value *v, Type *destType)
1170 {
1171 return createCast(Ice::InstCast::Trunc, v, destType);
1172 }
1173
1174 Value *Nucleus::createZExt(Value *v, Type *destType)
1175 {
1176 return createCast(Ice::InstCast::Zext, v, destType);
1177 }
1178
1179 Value *Nucleus::createSExt(Value *v, Type *destType)
1180 {
1181 return createCast(Ice::InstCast::Sext, v, destType);
1182 }
1183
1184 Value *Nucleus::createFPToSI(Value *v, Type *destType)
1185 {
1186 return createCast(Ice::InstCast::Fptosi, v, destType);
1187 }
1188
1189 Value *Nucleus::createSIToFP(Value *v, Type *destType)
1190 {
1191 return createCast(Ice::InstCast::Sitofp, v, destType);
1192 }
1193
1194 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1195 {
1196 return createCast(Ice::InstCast::Fptrunc, v, destType);
1197 }
1198
1199 Value *Nucleus::createFPExt(Value *v, Type *destType)
1200 {
1201 return createCast(Ice::InstCast::Fpext, v, destType);
1202 }
1203
1204 Value *Nucleus::createBitCast(Value *v, Type *destType)
1205 {
1206 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1207 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1208 // emulate them by writing to the stack and reading back as the destination type.
1209 if(emulateMismatchedBitCast)
1210 {
1211 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1212 {
1213 Value *address = allocateStackVariable(destType);
1214 createStore(v, address, T(v->getType()));
1215 return createLoad(address, destType);
1216 }
1217 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1218 {
1219 Value *address = allocateStackVariable(T(v->getType()));
1220 createStore(v, address, T(v->getType()));
1221 return createLoad(address, destType);
1222 }
1223 }
1224
1225 return createCast(Ice::InstCast::Bitcast, v, destType);
1226 }
1227
1228 static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1229 {
1230 ASSERT(lhs->getType() == rhs->getType());
1231
1232 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1233 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1234 ::basicBlock->appendInst(cmp);
1235
1236 return V(result);
1237 }
1238
1239 Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1240 {
1241 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1242 }
1243
1244 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1245 {
1246 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1247 }
1248
1249 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1250 {
1251 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1252 }
1253
1254 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1255 {
1256 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1257 }
1258
1259 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1260 {
1261 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1262 }
1263
1264 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1265 {
1266 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1267 }
1268
1269 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1270 {
1271 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1272 }
1273
1274 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1275 {
1276 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1277 }
1278
1279 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1280 {
1281 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1282 }
1283
1284 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1285 {
1286 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1287 }
1288
1289 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1290 {
1291 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1292 }
1293
1294 static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1295 {
1296 ASSERT(lhs->getType() == rhs->getType());
1297 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1298
1299 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1300 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1301 ::basicBlock->appendInst(cmp);
1302
1303 return V(result);
1304 }
1305
1306 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1307 {
1308 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1309 }
1310
1311 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1312 {
1313 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1314 }
1315
1316 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1317 {
1318 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1319 }
1320
1321 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1322 {
1323 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1324 }
1325
1326 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1327 {
1328 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1329 }
1330
1331 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1332 {
1333 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1334 }
1335
1336 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1337 {
1338 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1339 }
1340
1341 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1342 {
1343 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1344 }
1345
1346 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1347 {
1348 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1349 }
1350
1351 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1352 {
1353 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1354 }
1355
1356 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1357 {
1358 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1359 }
1360
1361 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1362 {
1363 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1364 }
1365
1366 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1367 {
1368 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1369 }
1370
1371 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1372 {
1373 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1374 }
1375
1376 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1377 {
1378 auto result = ::function->makeVariable(T(type));
1379 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1380 ::basicBlock->appendInst(extract);
1381
1382 return V(result);
1383 }
1384
1385 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1386 {
1387 auto result = ::function->makeVariable(vector->getType());
1388 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1389 ::basicBlock->appendInst(insert);
1390
1391 return V(result);
1392 }
1393
1394 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1395 {
1396 ASSERT(V1->getType() == V2->getType());
1397
1398 int size = Ice::typeNumElements(V1->getType());
1399 auto result = ::function->makeVariable(V1->getType());
1400 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1401
1402 for(int i = 0; i < size; i++)
1403 {
1404 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1405 }
1406
1407 ::basicBlock->appendInst(shuffle);
1408
1409 return V(result);
1410 }
1411
1412 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1413 {
1414 ASSERT(ifTrue->getType() == ifFalse->getType());
1415
1416 auto result = ::function->makeVariable(ifTrue->getType());
1417 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1418 ::basicBlock->appendInst(select);
1419
1420 return V(result);
1421 }
1422
1423 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1424 {
1425 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1426 ::basicBlock->appendInst(switchInst);
1427
1428 return reinterpret_cast<SwitchCases*>(switchInst);
1429 }
1430
1431 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1432 {
1433 switchCases->addBranch(label, label, branch);
1434 }
1435
1436 void Nucleus::createUnreachable()
1437 {
1438 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1439 ::basicBlock->appendInst(unreachable);
1440 }
1441
1442 Type *Nucleus::getPointerType(Type *ElementType)
1443 {
1444 if(sizeof(void*) == 8)
1445 {
1446 return T(Ice::IceType_i64);
1447 }
1448 else
1449 {
1450 return T(Ice::IceType_i32);
1451 }
1452 }
1453
1454 Value *Nucleus::createNullValue(Type *Ty)
1455 {
1456 if(Ice::isVectorType(T(Ty)))
1457 {
1458 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
1459 int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1460 return createConstantVector(c, Ty);
1461 }
1462 else
1463 {
1464 return V(::context->getConstantZero(T(Ty)));
1465 }
1466 }
1467
1468 Value *Nucleus::createConstantLong(int64_t i)
1469 {
1470 return V(::context->getConstantInt64(i));
1471 }
1472
1473 Value *Nucleus::createConstantInt(int i)
1474 {
1475 return V(::context->getConstantInt32(i));
1476 }
1477
1478 Value *Nucleus::createConstantInt(unsigned int i)
1479 {
1480 return V(::context->getConstantInt32(i));
1481 }
1482
1483 Value *Nucleus::createConstantBool(bool b)
1484 {
1485 return V(::context->getConstantInt1(b));
1486 }
1487
1488 Value *Nucleus::createConstantByte(signed char i)
1489 {
1490 return V(::context->getConstantInt8(i));
1491 }
1492
1493 Value *Nucleus::createConstantByte(unsigned char i)
1494 {
1495 return V(::context->getConstantInt8(i));
1496 }
1497
1498 Value *Nucleus::createConstantShort(short i)
1499 {
1500 return V(::context->getConstantInt16(i));
1501 }
1502
1503 Value *Nucleus::createConstantShort(unsigned short i)
1504 {
1505 return V(::context->getConstantInt16(i));
1506 }
1507
1508 Value *Nucleus::createConstantFloat(float x)
1509 {
1510 return V(::context->getConstantFloat(x));
1511 }
1512
1513 Value *Nucleus::createNullPointer(Type *Ty)
1514 {
1515 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1516 }
1517
1518 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1519 {
1520 const int vectorSize = 16;
1521 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
1522 const int alignment = vectorSize;
1523 auto globalPool = ::function->getGlobalPool();
1524
1525 const int64_t *i = constants;
1526 const double *f = reinterpret_cast<const double*>(constants);
1527 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1528
1529 switch((int)reinterpret_cast<intptr_t>(type))
1530 {
1531 case Ice::IceType_v4i32:
1532 case Ice::IceType_v4i1:
1533 {
1534 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1535 static_assert(sizeof(initializer) == vectorSize, "!");
1536 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1537 }
1538 break;
1539 case Ice::IceType_v4f32:
1540 {
1541 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1542 static_assert(sizeof(initializer) == vectorSize, "!");
1543 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1544 }
1545 break;
1546 case Ice::IceType_v8i16:
1547 case Ice::IceType_v8i1:
1548 {
1549 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1550 static_assert(sizeof(initializer) == vectorSize, "!");
1551 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1552 }
1553 break;
1554 case Ice::IceType_v16i8:
1555 case Ice::IceType_v16i1:
1556 {
1557 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1558 static_assert(sizeof(initializer) == vectorSize, "!");
1559 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1560 }
1561 break;
1562 case Type_v2i32:
1563 {
1564 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1565 static_assert(sizeof(initializer) == vectorSize, "!");
1566 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1567 }
1568 break;
1569 case Type_v2f32:
1570 {
1571 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1572 static_assert(sizeof(initializer) == vectorSize, "!");
1573 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1574 }
1575 break;
1576 case Type_v4i16:
1577 {
1578 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1579 static_assert(sizeof(initializer) == vectorSize, "!");
1580 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1581 }
1582 break;
1583 case Type_v8i8:
1584 {
1585 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1586 static_assert(sizeof(initializer) == vectorSize, "!");
1587 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1588 }
1589 break;
1590 case Type_v4i8:
1591 {
1592 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1593 static_assert(sizeof(initializer) == vectorSize, "!");
1594 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1595 }
1596 break;
1597 default:
1598 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
1599 }
1600
1601 auto name = Ice::GlobalString::createWithoutString(::context);
1602 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1603 variableDeclaration->setName(name);
1604 variableDeclaration->setAlignment(alignment);
1605 variableDeclaration->setIsConstant(true);
1606 variableDeclaration->addInitializer(dataInitializer);
1607
1608 ::function->addGlobal(variableDeclaration);
1609
1610 constexpr int32_t offset = 0;
1611 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1612
1613 Ice::Variable *result = ::function->makeVariable(T(type));
1614 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1615 ::basicBlock->appendInst(load);
1616
1617 return V(result);
1618 }
1619
1620 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1621 {
1622 return createConstantVector((const int64_t*)constants, type);
1623 }
1624
1625 Type *Void::getType()
1626 {
1627 return T(Ice::IceType_void);
1628 }
1629
1630 Type *Bool::getType()
1631 {
1632 return T(Ice::IceType_i1);
1633 }
1634
1635 Type *Byte::getType()
1636 {
1637 return T(Ice::IceType_i8);
1638 }
1639
1640 Type *SByte::getType()
1641 {
1642 return T(Ice::IceType_i8);
1643 }
1644
1645 Type *Short::getType()
1646 {
1647 return T(Ice::IceType_i16);
1648 }
1649
1650 Type *UShort::getType()
1651 {
1652 return T(Ice::IceType_i16);
1653 }
1654
1655 Type *Byte4::getType()
1656 {
1657 return T(Type_v4i8);
1658 }
1659
1660 Type *SByte4::getType()
1661 {
1662 return T(Type_v4i8);
1663 }
1664
1665 namespace
1666 {
1667 RValue<Byte> SaturateUnsigned(RValue<Short> x)
1668 {
1669 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
1670 }
1671
1672 RValue<Byte> Extract(RValue<Byte8> val, int i)
1673 {
1674 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1675 }
1676
1677 RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1678 {
1679 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1680 }
1681 }
1682
1683 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1684 {
1685 if(emulateIntrinsics)
1686 {
1687 Byte8 result;
1688 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1689 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1690 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1691 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1692 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1693 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1694 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1695 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
1696
1697 return result;
1698 }
1699 else
1700 {
1701 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1702 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1703 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1704 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1705 paddusb->addArg(x.value);
1706 paddusb->addArg(y.value);
1707 ::basicBlock->appendInst(paddusb);
1708
1709 return RValue<Byte8>(V(result));
1710 }
1711 }
1712
1713 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
1714 {
1715 if(emulateIntrinsics)
1716 {
1717 Byte8 result;
1718 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1719 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1720 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1721 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1722 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1723 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1724 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1725 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
1726
1727 return result;
1728 }
1729 else
1730 {
1731 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1732 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1733 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1734 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1735 psubusw->addArg(x.value);
1736 psubusw->addArg(y.value);
1737 ::basicBlock->appendInst(psubusw);
1738
1739 return RValue<Byte8>(V(result));
1740 }
1741 }
1742
1743 RValue<SByte> Extract(RValue<SByte8> val, int i)
1744 {
1745 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
1746 }
1747
1748 RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
1749 {
1750 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
1751 }
1752
1753 RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1754 {
1755 if(emulateIntrinsics)
1756 {
1757 SByte8 result;
1758 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
1759 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
1760 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
1761 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
1762 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
1763 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
1764 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
1765 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
1766
1767 return result;
1768 }
1769 else
1770 {
1771 #if defined(__i386__) || defined(__x86_64__)
1772 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
1773 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
1774 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
1775
1776 return As<SByte8>(hi | lo);
1777 #else
1778 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1779 #endif
1780 }
1781 }
1782
1783 RValue<Int> SignMask(RValue<Byte8> x)
1784 {
1785 if(emulateIntrinsics || CPUID::ARM)
1786 {
1787 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1788 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
1789 }
1790 else
1791 {
1792 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1793 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1794 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1795 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1796 movmsk->addArg(x.value);
1797 ::basicBlock->appendInst(movmsk);
1798
1799 return RValue<Int>(V(result)) & 0xFF;
1800 }
1801 }
1802
1803// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
1804// {
1805// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
1806// }
1807
1808 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
1809 {
1810 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1811 }
1812
1813 Type *Byte8::getType()
1814 {
1815 return T(Type_v8i8);
1816 }
1817
1818// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
1819// {
1820// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
1821// }
1822
1823// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1824// {
1825// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1826// }
1827
1828 RValue<SByte> SaturateSigned(RValue<Short> x)
1829 {
1830 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
1831 }
1832
1833 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
1834 {
1835 if(emulateIntrinsics)
1836 {
1837 SByte8 result;
1838 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1839 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1840 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1841 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1842 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1843 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1844 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1845 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
1846
1847 return result;
1848 }
1849 else
1850 {
1851 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1852 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1853 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1854 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1855 paddsb->addArg(x.value);
1856 paddsb->addArg(y.value);
1857 ::basicBlock->appendInst(paddsb);
1858
1859 return RValue<SByte8>(V(result));
1860 }
1861 }
1862
1863 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
1864 {
1865 if(emulateIntrinsics)
1866 {
1867 SByte8 result;
1868 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1869 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1870 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1871 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1872 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1873 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1874 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1875 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
1876
1877 return result;
1878 }
1879 else
1880 {
1881 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1882 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1883 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1884 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1885 psubsb->addArg(x.value);
1886 psubsb->addArg(y.value);
1887 ::basicBlock->appendInst(psubsb);
1888
1889 return RValue<SByte8>(V(result));
1890 }
1891 }
1892
1893 RValue<Int> SignMask(RValue<SByte8> x)
1894 {
1895 if(emulateIntrinsics || CPUID::ARM)
1896 {
1897 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1898 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
1899 }
1900 else
1901 {
1902 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1903 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1904 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1905 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1906 movmsk->addArg(x.value);
1907 ::basicBlock->appendInst(movmsk);
1908
1909 return RValue<Int>(V(result)) & 0xFF;
1910 }
1911 }
1912
1913 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
1914 {
1915 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
1916 }
1917
1918 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
1919 {
1920 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1921 }
1922
1923 Type *SByte8::getType()
1924 {
1925 return T(Type_v8i8);
1926 }
1927
1928 Type *Byte16::getType()
1929 {
1930 return T(Ice::IceType_v16i8);
1931 }
1932
1933 Type *SByte16::getType()
1934 {
1935 return T(Ice::IceType_v16i8);
1936 }
1937
1938 Type *Short2::getType()
1939 {
1940 return T(Type_v2i16);
1941 }
1942
1943 Type *UShort2::getType()
1944 {
1945 return T(Type_v2i16);
1946 }
1947
1948 Short4::Short4(RValue<Int4> cast)
1949 {
1950 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
1951 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
1952 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
1953
1954 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
1955 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
1956
1957 storeValue(short4);
1958 }
1959
1960// Short4::Short4(RValue<Float> cast)
1961// {
1962// }
1963
1964 Short4::Short4(RValue<Float4> cast)
1965 {
1966 UNIMPLEMENTED("Short4::Short4(RValue<Float4> cast)");
1967 }
1968
1969 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
1970 {
1971 if(emulateIntrinsics)
1972 {
1973 Short4 result;
1974 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
1975 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
1976 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
1977 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
1978
1979 return result;
1980 }
1981 else
1982 {
1983 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
1984 }
1985 }
1986
1987 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
1988 {
1989 if(emulateIntrinsics)
1990 {
1991 Short4 result;
1992 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
1993 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
1994 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
1995 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
1996
1997 return result;
1998 }
1999 else
2000 {
2001 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2002 }
2003 }
2004
2005 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2006 {
2007 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2008 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2009 ::basicBlock->appendInst(cmp);
2010
2011 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2012 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2013 ::basicBlock->appendInst(select);
2014
2015 return RValue<Short4>(V(result));
2016 }
2017
2018 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2019 {
2020 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2021 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2022 ::basicBlock->appendInst(cmp);
2023
2024 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2025 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2026 ::basicBlock->appendInst(select);
2027
2028 return RValue<Short4>(V(result));
2029 }
2030
2031 RValue<Short> SaturateSigned(RValue<Int> x)
2032 {
2033 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2034 }
2035
2036 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2037 {
2038 if(emulateIntrinsics)
2039 {
2040 Short4 result;
2041 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2042 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2043 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2044 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2045
2046 return result;
2047 }
2048 else
2049 {
2050 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2051 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2052 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2053 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2054 paddsw->addArg(x.value);
2055 paddsw->addArg(y.value);
2056 ::basicBlock->appendInst(paddsw);
2057
2058 return RValue<Short4>(V(result));
2059 }
2060 }
2061
2062 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2063 {
2064 if(emulateIntrinsics)
2065 {
2066 Short4 result;
2067 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2068 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2069 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2070 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2071
2072 return result;
2073 }
2074 else
2075 {
2076 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2077 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2078 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2079 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2080 psubsw->addArg(x.value);
2081 psubsw->addArg(y.value);
2082 ::basicBlock->appendInst(psubsw);
2083
2084 return RValue<Short4>(V(result));
2085 }
2086 }
2087
2088 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2089 {
2090 if(emulateIntrinsics)
2091 {
2092 Short4 result;
2093 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2094 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2095 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2096 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2097
2098 return result;
2099 }
2100 else
2101 {
2102 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2103 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2104 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2105 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2106 pmulhw->addArg(x.value);
2107 pmulhw->addArg(y.value);
2108 ::basicBlock->appendInst(pmulhw);
2109
2110 return RValue<Short4>(V(result));
2111 }
2112 }
2113
2114 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2115 {
2116 if(emulateIntrinsics)
2117 {
2118 Int2 result;
2119 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2120 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2121
2122 return result;
2123 }
2124 else
2125 {
2126 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2127 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2128 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2129 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2130 pmaddwd->addArg(x.value);
2131 pmaddwd->addArg(y.value);
2132 ::basicBlock->appendInst(pmaddwd);
2133
2134 return As<Int2>(V(result));
2135 }
2136 }
2137
2138 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2139 {
2140 if(emulateIntrinsics)
2141 {
2142 SByte8 result;
2143 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2144 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2145 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2146 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2147 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2148 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2149 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2150 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2151
2152 return result;
2153 }
2154 else
2155 {
2156 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2157 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2158 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2159 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2160 pack->addArg(x.value);
2161 pack->addArg(y.value);
2162 ::basicBlock->appendInst(pack);
2163
2164 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
2165 }
2166 }
2167
2168 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2169 {
2170 if(emulateIntrinsics)
2171 {
2172 Byte8 result;
2173 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2174 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2175 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2176 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2177 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2178 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2179 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2180 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2181
2182 return result;
2183 }
2184 else
2185 {
2186 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2187 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2188 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2189 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2190 pack->addArg(x.value);
2191 pack->addArg(y.value);
2192 ::basicBlock->appendInst(pack);
2193
2194 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
2195 }
2196 }
2197
2198 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2199 {
2200 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2201 }
2202
2203 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2204 {
2205 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2206 }
2207
2208 Type *Short4::getType()
2209 {
2210 return T(Type_v4i16);
2211 }
2212
2213 UShort4::UShort4(RValue<Float4> cast, bool saturate)
2214 {
2215 if(saturate)
2216 {
2217 if(CPUID::SSE4_1)
2218 {
2219 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2220 // PackUnsigned takes care of 0x0000 saturation.
2221 Int4 int4(Min(cast, Float4(0xFFFF)));
2222 *this = As<UShort4>(PackUnsigned(int4, int4));
2223 }
2224 else if(CPUID::ARM)
2225 {
2226 // ARM saturates the 32-bit integer result on overflow/undeflow.
2227 Int4 int4(cast);
2228 *this = As<UShort4>(PackUnsigned(int4, int4));
2229 }
2230 else
2231 {
2232 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
2233 }
2234 }
2235 else
2236 {
2237 *this = Short4(Int4(cast));
2238 }
2239 }
2240
2241 RValue<UShort> Extract(RValue<UShort4> val, int i)
2242 {
2243 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2244 }
2245
2246 RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2247 {
2248 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2249 }
2250
2251 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2252 {
2253 if(emulateIntrinsics)
2254 {
2255 UShort4 result;
2256 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2257 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2258 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2259 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2260
2261 return result;
2262 }
2263 else
2264 {
2265 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2266 }
2267 }
2268
2269 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2270 {
2271 if(emulateIntrinsics)
2272 {
2273 UShort4 result;
2274 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2275 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2276 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2277 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2278
2279 return result;
2280 }
2281 else
2282 {
2283 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2284 }
2285 }
2286
2287 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2288 {
2289 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2290 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2291 ::basicBlock->appendInst(cmp);
2292
2293 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2294 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2295 ::basicBlock->appendInst(select);
2296
2297 return RValue<UShort4>(V(result));
2298 }
2299
2300 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2301 {
2302 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2303 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2304 ::basicBlock->appendInst(cmp);
2305
2306 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2307 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2308 ::basicBlock->appendInst(select);
2309
2310 return RValue<UShort4>(V(result));
2311 }
2312
2313 RValue<UShort> SaturateUnsigned(RValue<Int> x)
2314 {
2315 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2316 }
2317
2318 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2319 {
2320 if(emulateIntrinsics)
2321 {
2322 UShort4 result;
2323 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2324 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2325 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2326 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2327
2328 return result;
2329 }
2330 else
2331 {
2332 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2333 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2334 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2335 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2336 paddusw->addArg(x.value);
2337 paddusw->addArg(y.value);
2338 ::basicBlock->appendInst(paddusw);
2339
2340 return RValue<UShort4>(V(result));
2341 }
2342 }
2343
2344 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2345 {
2346 if(emulateIntrinsics)
2347 {
2348 UShort4 result;
2349 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2350 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2351 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2352 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2353
2354 return result;
2355 }
2356 else
2357 {
2358 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2359 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2360 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2361 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2362 psubusw->addArg(x.value);
2363 psubusw->addArg(y.value);
2364 ::basicBlock->appendInst(psubusw);
2365
2366 return RValue<UShort4>(V(result));
2367 }
2368 }
2369
2370 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2371 {
2372 if(emulateIntrinsics)
2373 {
2374 UShort4 result;
2375 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2376 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2377 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2378 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2379
2380 return result;
2381 }
2382 else
2383 {
2384 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2385 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2386 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2387 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2388 pmulhuw->addArg(x.value);
2389 pmulhuw->addArg(y.value);
2390 ::basicBlock->appendInst(pmulhuw);
2391
2392 return RValue<UShort4>(V(result));
2393 }
2394 }
2395
2396 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2397 {
2398 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2399
2400 // Scalarized implementation.
2401 Int4 result;
2402 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2403 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2404 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2405 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2406
2407 return result;
2408 }
2409
2410 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2411 {
2412 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2413
2414 if(false) // Partial product based implementation.
2415 {
2416 auto xh = x >> 16;
2417 auto yh = y >> 16;
2418 auto xl = x & UInt4(0x0000FFFF);
2419 auto yl = y & UInt4(0x0000FFFF);
2420 auto xlyh = xl * yh;
2421 auto xhyl = xh * yl;
2422 auto xlyhh = xlyh >> 16;
2423 auto xhylh = xhyl >> 16;
2424 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2425 auto xhyll = xhyl & UInt4(0x0000FFFF);
2426 auto xlylh = (xl * yl) >> 16;
2427 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2428
2429 return (xh * yh) + (xlyhh + xhylh) + oflow;
2430 }
2431
2432 // Scalarized implementation.
2433 Int4 result;
2434 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2435 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2436 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2437 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2438
2439 return As<UInt4>(result);
2440 }
2441
2442 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2443 {
2444 UNIMPLEMENTED("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
2445 return UShort4(0);
2446 }
2447
2448 Type *UShort4::getType()
2449 {
2450 return T(Type_v4i16);
2451 }
2452
2453 RValue<Short> Extract(RValue<Short8> val, int i)
2454 {
2455 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2456 }
2457
2458 RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2459 {
2460 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2461 }
2462
2463 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2464 {
2465 if(emulateIntrinsics)
2466 {
2467 Short8 result;
2468 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2469 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2470 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2471 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2472 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2473 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2474 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2475 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
2476
2477 return result;
2478 }
2479 else
2480 {
2481 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2482 }
2483 }
2484
2485 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2486 {
2487 if(emulateIntrinsics)
2488 {
2489 Short8 result;
2490 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2491 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2492 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2493 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2494 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
2495 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
2496 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
2497 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
2498
2499 return result;
2500 }
2501 else
2502 {
2503 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2504 }
2505 }
2506
2507 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2508 {
2509 UNIMPLEMENTED("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
2510 return Int4(0);
2511 }
2512
2513 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2514 {
2515 UNIMPLEMENTED("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
2516 return Short8(0);
2517 }
2518
2519 Type *Short8::getType()
2520 {
2521 return T(Ice::IceType_v8i16);
2522 }
2523
2524 RValue<UShort> Extract(RValue<UShort8> val, int i)
2525 {
2526 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2527 }
2528
2529 RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2530 {
2531 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2532 }
2533
2534 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2535 {
2536 if(emulateIntrinsics)
2537 {
2538 UShort8 result;
2539 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2540 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2541 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2542 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2543 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
2544 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
2545 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
2546 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
2547
2548 return result;
2549 }
2550 else
2551 {
2552 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2553 }
2554 }
2555
2556 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2557 {
2558 if(emulateIntrinsics)
2559 {
2560 UShort8 result;
2561 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2562 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2563 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2564 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2565 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
2566 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
2567 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
2568 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
2569
2570 return result;
2571 }
2572 else
2573 {
2574 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2575 }
2576 }
2577
2578 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
2579 {
2580 UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
2581 return UShort8(0);
2582 }
2583
2584 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2585 {
2586 UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
2587 return UShort8(0);
2588 }
2589
2590 // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
2591// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
2592// {
2593// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
2594// }
2595
2596 Type *UShort8::getType()
2597 {
2598 return T(Ice::IceType_v8i16);
2599 }
2600
2601 RValue<Int> operator++(Int &val, int) // Post-increment
2602 {
2603 RValue<Int> res = val;
2604 val += 1;
2605 return res;
2606 }
2607
2608 const Int &operator++(Int &val) // Pre-increment
2609 {
2610 val += 1;
2611 return val;
2612 }
2613
2614 RValue<Int> operator--(Int &val, int) // Post-decrement
2615 {
2616 RValue<Int> res = val;
2617 val -= 1;
2618 return res;
2619 }
2620
2621 const Int &operator--(Int &val) // Pre-decrement
2622 {
2623 val -= 1;
2624 return val;
2625 }
2626
2627 RValue<Int> RoundInt(RValue<Float> cast)
2628 {
2629 if(emulateIntrinsics || CPUID::ARM)
2630 {
2631 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2632 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
2633 }
2634 else
2635 {
2636 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2637 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2638 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2639 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2640 nearbyint->addArg(cast.value);
2641 ::basicBlock->appendInst(nearbyint);
2642
2643 return RValue<Int>(V(result));
2644 }
2645 }
2646
2647 Type *Int::getType()
2648 {
2649 return T(Ice::IceType_i32);
2650 }
2651
2652 Type *Long::getType()
2653 {
2654 return T(Ice::IceType_i64);
2655 }
2656
2657 UInt::UInt(RValue<Float> cast)
2658 {
2659 // Smallest positive value representable in UInt, but not in Int
2660 const unsigned int ustart = 0x80000000u;
2661 const float ustartf = float(ustart);
2662
2663 // If the value is negative, store 0, otherwise store the result of the conversion
2664 storeValue((~(As<Int>(cast) >> 31) &
2665 // Check if the value can be represented as an Int
2666 IfThenElse(cast >= ustartf,
2667 // If the value is too large, subtract ustart and re-add it after conversion.
2668 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2669 // Otherwise, just convert normally
2670 Int(cast))).value);
2671 }
2672
2673 RValue<UInt> operator++(UInt &val, int) // Post-increment
2674 {
2675 RValue<UInt> res = val;
2676 val += 1;
2677 return res;
2678 }
2679
2680 const UInt &operator++(UInt &val) // Pre-increment
2681 {
2682 val += 1;
2683 return val;
2684 }
2685
2686 RValue<UInt> operator--(UInt &val, int) // Post-decrement
2687 {
2688 RValue<UInt> res = val;
2689 val -= 1;
2690 return res;
2691 }
2692
2693 const UInt &operator--(UInt &val) // Pre-decrement
2694 {
2695 val -= 1;
2696 return val;
2697 }
2698
2699// RValue<UInt> RoundUInt(RValue<Float> cast)
2700// {
2701// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
2702// }
2703
2704 Type *UInt::getType()
2705 {
2706 return T(Ice::IceType_i32);
2707 }
2708
2709// Int2::Int2(RValue<Int> cast)
2710// {
2711// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2712// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
2713//
2714// Constant *shuffle[2];
2715// shuffle[0] = Nucleus::createConstantInt(0);
2716// shuffle[1] = Nucleus::createConstantInt(0);
2717//
2718// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
2719//
2720// storeValue(replicate);
2721// }
2722
2723 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
2724 {
2725 if(emulateIntrinsics)
2726 {
2727 Int2 result;
2728 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2729 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
2730
2731 return result;
2732 }
2733 else
2734 {
2735 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2736 }
2737 }
2738
2739 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
2740 {
2741 if(emulateIntrinsics)
2742 {
2743 Int2 result;
2744 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2745 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2746
2747 return result;
2748 }
2749 else
2750 {
2751 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2752 }
2753 }
2754
2755 Type *Int2::getType()
2756 {
2757 return T(Type_v2i32);
2758 }
2759
2760 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
2761 {
2762 if(emulateIntrinsics)
2763 {
2764 UInt2 result;
2765 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
2766 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
2767
2768 return result;
2769 }
2770 else
2771 {
2772 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2773 }
2774 }
2775
2776 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
2777 {
2778 if(emulateIntrinsics)
2779 {
2780 UInt2 result;
2781 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
2782 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
2783
2784 return result;
2785 }
2786 else
2787 {
2788 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2789 }
2790 }
2791
2792 Type *UInt2::getType()
2793 {
2794 return T(Type_v2i32);
2795 }
2796
2797 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
2798 {
2799 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2800 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2801
2802 Value *e;
2803 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
2804 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2805 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
2806
2807 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2808 Value *d = Nucleus::createBitCast(c, Short8::getType());
2809 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
2810
2811 Value *f = Nucleus::createBitCast(e, Int4::getType());
2812 storeValue(f);
2813 }
2814
2815 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
2816 {
2817 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2818 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2819
2820 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2821 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2822 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
2823
2824 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
2825 Value *d = Nucleus::createBitCast(c, Short8::getType());
2826 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
2827
2828 *this = As<Int4>(e) >> 24;
2829 }
2830
2831 Int4::Int4(RValue<Short4> cast) : XYZW(this)
2832 {
2833 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
2834 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2835
2836 *this = As<Int4>(c) >> 16;
2837 }
2838
2839 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
2840 {
2841 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2842 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2843 Value *d = Nucleus::createBitCast(c, Int4::getType());
2844 storeValue(d);
2845 }
2846
2847 Int4::Int4(RValue<Int> rhs) : XYZW(this)
2848 {
2849 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
2850
2851 int swizzle[4] = {0, 0, 0, 0};
2852 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
2853
2854 storeValue(replicate);
2855 }
2856
2857 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
2858 {
2859 if(emulateIntrinsics)
2860 {
2861 Int4 result;
2862 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2863 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
2864 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
2865 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
2866
2867 return result;
2868 }
2869 else
2870 {
2871 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2872 }
2873 }
2874
2875 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
2876 {
2877 if(emulateIntrinsics)
2878 {
2879 Int4 result;
2880 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2881 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2882 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
2883 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
2884
2885 return result;
2886 }
2887 else
2888 {
2889 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2890 }
2891 }
2892
2893 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2894 {
2895 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
2896 }
2897
2898 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2899 {
2900 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
2901 }
2902
2903 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2904 {
2905 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
2906 }
2907
2908 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2909 {
2910 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
2911 }
2912
2913 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2914 {
2915 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
2916 }
2917
2918 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2919 {
2920 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
2921 }
2922
2923 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2924 {
2925 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2926 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2927 ::basicBlock->appendInst(cmp);
2928
2929 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2930 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2931 ::basicBlock->appendInst(select);
2932
2933 return RValue<Int4>(V(result));
2934 }
2935
2936 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2937 {
2938 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2939 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2940 ::basicBlock->appendInst(cmp);
2941
2942 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2943 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2944 ::basicBlock->appendInst(select);
2945
2946 return RValue<Int4>(V(result));
2947 }
2948
2949 RValue<Int4> RoundInt(RValue<Float4> cast)
2950 {
2951 if(emulateIntrinsics || CPUID::ARM)
2952 {
2953 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2954 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
2955 }
2956 else
2957 {
2958 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2959 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2960 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2961 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2962 nearbyint->addArg(cast.value);
2963 ::basicBlock->appendInst(nearbyint);
2964
2965 return RValue<Int4>(V(result));
2966 }
2967 }
2968
2969 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
2970 {
2971 if(emulateIntrinsics)
2972 {
2973 Short8 result;
2974 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2975 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2976 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2977 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2978 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2979 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2980 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2981 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2982
2983 return result;
2984 }
2985 else
2986 {
2987 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2988 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2989 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2990 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2991 pack->addArg(x.value);
2992 pack->addArg(y.value);
2993 ::basicBlock->appendInst(pack);
2994
2995 return RValue<Short8>(V(result));
2996 }
2997 }
2998
2999 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3000 {
3001 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
3002 {
3003 RValue<Int4> sx = As<Int4>(x);
3004 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
3005
3006 RValue<Int4> sy = As<Int4>(y);
3007 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
3008
3009 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
3010 }
3011 else
3012 {
3013 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3014 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3015 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3016 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3017 pack->addArg(x.value);
3018 pack->addArg(y.value);
3019 ::basicBlock->appendInst(pack);
3020
3021 return RValue<UShort8>(V(result));
3022 }
3023 }
3024
3025 RValue<Int> SignMask(RValue<Int4> x)
3026 {
3027 if(emulateIntrinsics || CPUID::ARM)
3028 {
3029 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3030 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3031 }
3032 else
3033 {
3034 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3035 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3036 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3037 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3038 movmsk->addArg(x.value);
3039 ::basicBlock->appendInst(movmsk);
3040
3041 return RValue<Int>(V(result));
3042 }
3043 }
3044
3045 Type *Int4::getType()
3046 {
3047 return T(Ice::IceType_v4i32);
3048 }
3049
3050 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
3051 {
3052 // Smallest positive value representable in UInt, but not in Int
3053 const unsigned int ustart = 0x80000000u;
3054 const float ustartf = float(ustart);
3055
3056 // Check if the value can be represented as an Int
3057 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3058 // If the value is too large, subtract ustart and re-add it after conversion.
3059 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
3060 // Otherwise, just convert normally
3061 (~uiValue & Int4(cast));
3062 // If the value is negative, store 0, otherwise store the result of the conversion
3063 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3064 }
3065
3066 UInt4::UInt4(RValue<UInt> rhs) : XYZW(this)
3067 {
3068 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3069
3070 int swizzle[4] = {0, 0, 0, 0};
3071 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3072
3073 storeValue(replicate);
3074 }
3075
3076 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3077 {
3078 if(emulateIntrinsics)
3079 {
3080 UInt4 result;
3081 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3082 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3083 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3084 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
3085
3086 return result;
3087 }
3088 else
3089 {
3090 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3091 }
3092 }
3093
3094 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3095 {
3096 if(emulateIntrinsics)
3097 {
3098 UInt4 result;
3099 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3100 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3101 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3102 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
3103
3104 return result;
3105 }
3106 else
3107 {
3108 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3109 }
3110 }
3111
3112 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3113 {
3114 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3115 }
3116
3117 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3118 {
3119 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3120 }
3121
3122 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3123 {
3124 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3125 }
3126
3127 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3128 {
3129 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3130 }
3131
3132 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3133 {
3134 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3135 }
3136
3137 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3138 {
3139 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3140 }
3141
3142 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3143 {
3144 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3145 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3146 ::basicBlock->appendInst(cmp);
3147
3148 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3149 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3150 ::basicBlock->appendInst(select);
3151
3152 return RValue<UInt4>(V(result));
3153 }
3154
3155 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3156 {
3157 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3158 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3159 ::basicBlock->appendInst(cmp);
3160
3161 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3162 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3163 ::basicBlock->appendInst(select);
3164
3165 return RValue<UInt4>(V(result));
3166 }
3167
3168 Type *UInt4::getType()
3169 {
3170 return T(Ice::IceType_v4i32);
3171 }
3172
3173 Type *Half::getType()
3174 {
3175 return T(Ice::IceType_i16);
3176 }
3177
3178 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3179 {
3180 return 1.0f / x;
3181 }
3182
3183 RValue<Float> RcpSqrt_pp(RValue<Float> x)
3184 {
3185 return Rcp_pp(Sqrt(x));
3186 }
3187
3188 RValue<Float> Sqrt(RValue<Float> x)
3189 {
3190 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
3191 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3192 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3193 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3194 sqrt->addArg(x.value);
3195 ::basicBlock->appendInst(sqrt);
3196
3197 return RValue<Float>(V(result));
3198 }
3199
3200 RValue<Float> Round(RValue<Float> x)
3201 {
3202 return Float4(Round(Float4(x))).x;
3203 }
3204
3205 RValue<Float> Trunc(RValue<Float> x)
3206 {
3207 return Float4(Trunc(Float4(x))).x;
3208 }
3209
3210 RValue<Float> Frac(RValue<Float> x)
3211 {
3212 return Float4(Frac(Float4(x))).x;
3213 }
3214
3215 RValue<Float> Floor(RValue<Float> x)
3216 {
3217 return Float4(Floor(Float4(x))).x;
3218 }
3219
3220 RValue<Float> Ceil(RValue<Float> x)
3221 {
3222 return Float4(Ceil(Float4(x))).x;
3223 }
3224
3225 Type *Float::getType()
3226 {
3227 return T(Ice::IceType_f32);
3228 }
3229
3230 Type *Float2::getType()
3231 {
3232 return T(Type_v2f32);
3233 }
3234
3235 Float4::Float4(RValue<Float> rhs) : XYZW(this)
3236 {
3237 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3238
3239 int swizzle[4] = {0, 0, 0, 0};
3240 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3241
3242 storeValue(replicate);
3243 }
3244
3245 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3246 {
3247 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3248 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3249 ::basicBlock->appendInst(cmp);
3250
3251 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3252 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3253 ::basicBlock->appendInst(select);
3254
3255 return RValue<Float4>(V(result));
3256 }
3257
3258 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3259 {
3260 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3261 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3262 ::basicBlock->appendInst(cmp);
3263
3264 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3265 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3266 ::basicBlock->appendInst(select);
3267
3268 return RValue<Float4>(V(result));
3269 }
3270
3271 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3272 {
3273 return Float4(1.0f) / x;
3274 }
3275
3276 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3277 {
3278 return Rcp_pp(Sqrt(x));
3279 }
3280
3281 RValue<Float4> Sqrt(RValue<Float4> x)
3282 {
3283 if(emulateIntrinsics || CPUID::ARM)
3284 {
3285 Float4 result;
3286 result.x = Sqrt(Float(Float4(x).x));
3287 result.y = Sqrt(Float(Float4(x).y));
3288 result.z = Sqrt(Float(Float4(x).z));
3289 result.w = Sqrt(Float(Float4(x).w));
3290
3291 return result;
3292 }
3293 else
3294 {
3295 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3296 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3297 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3298 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3299 sqrt->addArg(x.value);
3300 ::basicBlock->appendInst(sqrt);
3301
3302 return RValue<Float4>(V(result));
3303 }
3304 }
3305
3306 RValue<Int> SignMask(RValue<Float4> x)
3307 {
3308 if(emulateIntrinsics || CPUID::ARM)
3309 {
3310 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3311 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3312 }
3313 else
3314 {
3315 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3316 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3317 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3318 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3319 movmsk->addArg(x.value);
3320 ::basicBlock->appendInst(movmsk);
3321
3322 return RValue<Int>(V(result));
3323 }
3324 }
3325
3326 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3327 {
3328 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3329 }
3330
3331 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3332 {
3333 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3334 }
3335
3336 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3337 {
3338 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3339 }
3340
3341 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3342 {
3343 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3344 }
3345
3346 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3347 {
3348 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3349 }
3350
3351 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3352 {
3353 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3354 }
3355
3356 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3357 {
3358 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3359 }
3360
3361 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3362 {
3363 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3364 }
3365
3366 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3367 {
3368 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3369 }
3370
3371 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3372 {
3373 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3374 }
3375
3376 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3377 {
3378 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3379 }
3380
3381 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3382 {
3383 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3384 }
3385
3386 RValue<Float4> Round(RValue<Float4> x)
3387 {
3388 if(emulateIntrinsics || CPUID::ARM)
3389 {
3390 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3391 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3392 }
3393 else if(CPUID::SSE4_1)
3394 {
3395 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3396 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3397 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3398 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3399 round->addArg(x.value);
3400 round->addArg(::context->getConstantInt32(0));
3401 ::basicBlock->appendInst(round);
3402
3403 return RValue<Float4>(V(result));
3404 }
3405 else
3406 {
3407 return Float4(RoundInt(x));
3408 }
3409 }
3410
3411 RValue<Float4> Trunc(RValue<Float4> x)
3412 {
3413 if(CPUID::SSE4_1)
3414 {
3415 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3416 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3417 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3418 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3419 round->addArg(x.value);
3420 round->addArg(::context->getConstantInt32(3));
3421 ::basicBlock->appendInst(round);
3422
3423 return RValue<Float4>(V(result));
3424 }
3425 else
3426 {
3427 return Float4(Int4(x));
3428 }
3429 }
3430
3431 RValue<Float4> Frac(RValue<Float4> x)
3432 {
3433 Float4 frc;
3434
3435 if(CPUID::SSE4_1)
3436 {
3437 frc = x - Floor(x);
3438 }
3439 else
3440 {
3441 frc = x - Float4(Int4(x)); // Signed fractional part.
3442
3443 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
3444 }
3445
3446 // x - floor(x) can be 1.0 for very small negative x.
3447 // Clamp against the value just below 1.0.
3448 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
3449 }
3450
3451 RValue<Float4> Floor(RValue<Float4> x)
3452 {
3453 if(CPUID::SSE4_1)
3454 {
3455 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3456 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3457 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3458 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3459 round->addArg(x.value);
3460 round->addArg(::context->getConstantInt32(1));
3461 ::basicBlock->appendInst(round);
3462
3463 return RValue<Float4>(V(result));
3464 }
3465 else
3466 {
3467 return x - Frac(x);
3468 }
3469 }
3470
3471 RValue<Float4> Ceil(RValue<Float4> x)
3472 {
3473 if(CPUID::SSE4_1)
3474 {
3475 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3476 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3477 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3478 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3479 round->addArg(x.value);
3480 round->addArg(::context->getConstantInt32(2));
3481 ::basicBlock->appendInst(round);
3482
3483 return RValue<Float4>(V(result));
3484 }
3485 else
3486 {
3487 return -Floor(-x);
3488 }
3489 }
3490
3491 Type *Float4::getType()
3492 {
3493 return T(Ice::IceType_v4f32);
3494 }
3495
3496 RValue<Long> Ticks()
3497 {
3498 UNIMPLEMENTED("RValue<Long> Ticks()");
3499 return Long(Int(0));
3500 }
3501
3502 RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3503 {
3504 if (sizeof(void*) == 8)
3505 {
3506 return RValue<Pointer<Byte>>(V(::context->getConstantInt64(reinterpret_cast<intptr_t>(ptr))));
3507 }
3508 else
3509 {
3510 return RValue<Pointer<Byte>>(V(::context->getConstantInt32(reinterpret_cast<intptr_t>(ptr))));
3511 }
3512 }
3513
3514 RValue<Pointer<Byte>> ConstantData(void const * data, size_t size)
3515 {
3516 // TODO: Try to use Ice::VariableDeclaration::DataInitializer and
3517 // getConstantSym instead of tagging data on the routine.
3518 return ConstantPointer(::routine->addConstantData(data, size));
3519 }
3520
3521 Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value*> args, std::initializer_list<Type*> argTys)
3522 {
3523 // FIXME: This does not currently work on Windows.
3524 Ice::Variable *ret = nullptr;
3525 if (retTy != nullptr)
3526 {
3527 ret = ::function->makeVariable(T(retTy));
3528 }
3529 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
3530 for (auto arg : args)
3531 {
3532 call->addArg(V(arg));
3533 }
3534 ::basicBlock->appendInst(call);
3535 return V(ret);
3536 }
3537
3538 void Breakpoint()
3539 {
3540 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3541 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3542 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3543 ::basicBlock->appendInst(trap);
3544 }
3545
3546 // Below are functions currently unimplemented for the Subzero backend.
3547 // They are stubbed to satisfy the linker.
3548 void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
3549 Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; }
3550 void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); }
3551 Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
3552 void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); }
3553 RValue<Float> Exp2(RValue<Float> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float(0); }
3554 RValue<Float> Log2(RValue<Float> x) { UNIMPLEMENTED("Subzero Log2()"); return Float(0); }
3555 RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); }
3556 RValue<Float4> Cos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cos()"); return Float4(0); }
3557 RValue<Float4> Tan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tan()"); return Float4(0); }
3558 RValue<Float4> Asin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asin()"); return Float4(0); }
3559 RValue<Float4> Acos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acos()"); return Float4(0); }
3560 RValue<Float4> Atan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atan()"); return Float4(0); }
3561 RValue<Float4> Sinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sinh()"); return Float4(0); }
3562 RValue<Float4> Cosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cosh()"); return Float4(0); }
3563 RValue<Float4> Tanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tanh()"); return Float4(0); }
3564 RValue<Float4> Asinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asinh()"); return Float4(0); }
3565 RValue<Float4> Acosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acosh()"); return Float4(0); }
3566 RValue<Float4> Atanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atanh()"); return Float4(0); }
3567 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Atan2()"); return Float4(0); }
3568 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Pow()"); return Float4(0); }
3569 RValue<Float4> Exp(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp()"); return Float4(0); }
3570 RValue<Float4> Log(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log()"); return Float4(0); }
3571 RValue<Float4> Exp2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float4(0); }
3572 RValue<Float4> Log2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log2()"); return Float4(0); }
3573 RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt(0); }
3574 RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt4(0); }
3575 RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt(0); }
3576 RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt4(0); }
3577
3578 void EmitDebugLocation() {}
3579 void EmitDebugVariable(Value* value) {}
3580 void FlushDebug() {}
3581
3582 void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params) { UNIMPLEMENTED("createCoroutine"); }
3583 std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; }
3584 void Nucleus::yield(Value* val) { UNIMPLEMENTED("Yield"); }
3585
3586}
3587