SubzeroReactor.cpp source code [engine/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp]

1	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "Reactor.hpp"
16	#include "Debug.hpp"
17
18	#include "Optimizer.hpp"
19	#include "ExecutableMemory.hpp"
20
21	#include "src/IceTypes.h"
22	#include "src/IceCfg.h"
23	#include "src/IceELFStreamer.h"
24	#include "src/IceGlobalContext.h"
25	#include "src/IceCfgNode.h"
26	#include "src/IceELFObjectWriter.h"
27	#include "src/IceGlobalInits.h"
28
29	#include "llvm/Support/FileSystem.h"
30	#include "llvm/Support/raw_os_ostream.h"
31	#include "llvm/Support/Compiler.h"
32
33	#if __has_feature(memory_sanitizer)
34	#include <sanitizer/msan_interface.h>
35	#endif
36
37	#if defined(_WIN32)
38	#ifndef WIN32_LEAN_AND_MEAN
39	#define WIN32_LEAN_AND_MEAN
40	#endif // !WIN32_LEAN_AND_MEAN
41	#ifndef NOMINMAX
42	#define NOMINMAX
43	#endif // !NOMINMAX
44	#include <Windows.h>
45	#else
46	#include <sys/mman.h>
47	#if !defined(MAP_ANONYMOUS)
48	#define MAP_ANONYMOUS MAP_ANON
49	#endif
50	#endif
51
52	#include <mutex>
53	#include <limits>
54	#include <iostream>
55
56	namespace rr
57	{
58	class ELFMemoryStreamer;
59	}
60
61	namespace
62	{
63	// Default configuration settings. Must be accessed under mutex lock.
64	std::mutex defaultConfigLock;
65	rr::Config &defaultConfig()
66	{
67	// This uses a static in a function to avoid the cost of a global static
68	// initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
69	static rr::Config config = rr::Config::Edit ()
70	.set(rr::Optimization::Level::Default)
71	.apply({});
72	return config;
73	}
74
75	Ice::GlobalContext context = nullptr*;
76	Ice::Cfg function = nullptr*;
77	Ice::CfgNode basicBlock = nullptr*;
78	Ice::CfgLocalAllocatorScope allocator = nullptr*;
79	rr::ELFMemoryStreamer routine = nullptr*;
80
81	std::mutex codegenMutex;
82
83	Ice::ELFFileStreamer elfFile = nullptr*;
84	Ice::Fdstream out = nullptr*;
85	}
86
87	namespace
88	{
89	#if !defined(__i386__) && defined(_M_IX86)
90	#define __i386__ 1
91	#endif
92
93	#if !defined(__x86_64__) && (defined(_M_AMD64) \|\| defined (_M_X64))
94	#define __x86_64__ 1
95	#endif
96
97	static Ice::OptLevel toIce(rr::Optimization::Level level)
98	{
99	switch (level)
100	{
101	case rr::Optimization::Level::None: return Ice::Opt_0;
102	case rr::Optimization::Level::Less: return Ice::Opt_1;
103	case rr::Optimization::Level::Default: return Ice::Opt_2;
104	case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
105	default: UNREACHABLE("Unknown Optimization Level %d", int(level));
106	}
107	return Ice::Opt_2;
108	}
109
110	class CPUID
111	{
112	public:
113	const static bool ARM;
114	const static bool SSE4_1;
115
116	private:
117	static void cpuid(int registers[`4`], int info)
118	{
119	#if defined(__i386__) \|\| defined(__x86_64__)
120	#if defined(_WIN32)
121	__cpuid(registers, info);
122	#else
123	__asm volatile("cpuid": "=a" (registers[`0`]), "=b" (registers[`1`]), "=c" (registers[`2`]), "=d" (registers[`3`]): "a" (info));
124	#endif
125	#else
126	registers[`0`] = `0`;
127	registers[`1`] = `0`;
128	registers[`2`] = `0`;
129	registers[`3`] = `0`;
130	#endif
131	}
132
133	static bool detectARM()
134	{
135	#if defined(__arm__) \|\| defined(__aarch64__)
136	return true;
137	#elif defined(__i386__) \|\| defined(__x86_64__)
138	return false;
139	#elif defined(__mips__)
140	return false;
141	#else
142	#error "Unknown architecture"
143	#endif
144	}
145
146	static bool detectSSE4_1()
147	{
148	#if defined(__i386__) \|\| defined(__x86_64__)
149	int registers[`4`];
150	cpuid(registers, `1`);
151	return (registers[`2`] & `0x00080000`) != `0`;
152	#else
153	return false;
154	#endif
155	}
156	};
157
158	const bool CPUID::ARM = CPUID::detectARM();
159	const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
160	const bool emulateIntrinsics = false;
161	const bool emulateMismatchedBitCast = CPUID::ARM;
162	}
163
164	namespace rr
165	{
166	const Capabilities Caps =
167	{
168	false, // CoroutinesSupported
169	};
170
171	enum EmulatedType
172	{
173	EmulatedShift = `16`,
174	EmulatedV2 = `2` << EmulatedShift,
175	EmulatedV4 = `4` << EmulatedShift,
176	EmulatedV8 = `8` << EmulatedShift,
177	EmulatedBits = EmulatedV2 \| EmulatedV4 \| EmulatedV8,
178
179	Type_v2i32 = Ice::IceType_v4i32 \| EmulatedV2,
180	Type_v4i16 = Ice::IceType_v8i16 \| EmulatedV4,
181	Type_v2i16 = Ice::IceType_v8i16 \| EmulatedV2,
182	Type_v8i8 = Ice::IceType_v16i8 \| EmulatedV8,
183	Type_v4i8 = Ice::IceType_v16i8 \| EmulatedV4,
184	Type_v2f32 = Ice::IceType_v4f32 \| EmulatedV2,
185	};
186
187	class Value : public Ice::Operand {};
188	class SwitchCases : public Ice::InstSwitch {};
189	class BasicBlock : public Ice::CfgNode {};
190
191	Ice::Type T(Type *t)
192	{
193	static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
194	return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
195	}
196
197	Type *T(Ice::Type t)
198	{
199	return reinterpret_cast<Type*>(t);
200	}
201
202	Type *T(EmulatedType t)
203	{
204	return reinterpret_cast<Type*>(t);
205	}
206
207	Value V(Ice::Operand v)
208	{
209	return reinterpret_cast<Value*>(v);
210	}
211
212	BasicBlock B(Ice::CfgNode b)
213	{
214	return reinterpret_cast<BasicBlock*>(b);
215	}
216
217	static size_t typeSize(Type *type)
218	{
219	if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
220	{
221	switch(reinterpret_cast<std::intptr_t>(type))
222	{
223	case Type_v2i32: return `8`;
224	case Type_v4i16: return `8`;
225	case Type_v2i16: return `4`;
226	case Type_v8i8: return `8`;
227	case Type_v4i8: return `4`;
228	case Type_v2f32: return `8`;
229	default: ASSERT(false);
230	}
231	}
232
233	return Ice::typeWidthInBytes(T(type));
234	}
235
236	using ElfHeader = std::conditional<sizeof(void*) == `8`, Elf64_Ehdr, Elf32_Ehdr>::type;
237	using SectionHeader = std::conditional<sizeof(void*) == `8`, Elf64_Shdr, Elf32_Shdr>::type;
238
239	inline const SectionHeader sectionHeader(const* ElfHeader *elfHeader)
240	{
241	return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
242	}
243
244	inline const SectionHeader elfSection(const* ElfHeader elfHeader, int* index)
245	{
246	return &sectionHeader(elfHeader)[index];
247	}
248
249	static void relocateSymbol(const* ElfHeader elfHeader, const* Elf32_Rel &relocation, const SectionHeader &relocationTable)
250	{
251	const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
252
253	uint32_t index = relocation.getSymbol();
254	int table = relocationTable.sh_link;
255	void symbolValue = nullptr*;
256
257	if(index != SHN_UNDEF)
258	{
259	if(table == SHN_UNDEF) return nullptr;
260	const SectionHeader *symbolTable = elfSection(elfHeader, table);
261
262	uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
263	if(index >= symtab_entries)
264	{
265	ASSERT(index < symtab_entries && "Symbol Index out of range");
266	return nullptr;
267	}
268
269	intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
270	Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
271	uint16_t section = symbol.st_shndx;
272
273	if(section != SHN_UNDEF && section < SHN_LORESERVE)
274	{
275	const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
276	symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
277	}
278	else
279	{
280	return nullptr;
281	}
282	}
283
284	intptr_t address = (intptr_t)elfHeader + target->sh_offset;
285	unaligned_ptr<int32_t> patchSite = (int32_t*)(address + relocation.r_offset);
286
287	if(CPUID::ARM)
288	{
289	switch(relocation.getType())
290	{
291	case R_ARM_NONE:
292	// No relocation
293	break;
294	case R_ARM_MOVW_ABS_NC:
295	{
296	uint32_t thumb = `0`; // Calls to Thumb code not supported.
297	uint32_t lo = (uint32_t)(intptr_t)symbolValue \| thumb;
298	patchSite = (patchSite & `0xFFF0F000`) \| ((lo & `0xF000`) << `4`) \| (lo & `0x0FFF`);
299	}
300	break;
301	case R_ARM_MOVT_ABS:
302	{
303	uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> `16`;
304	patchSite = (patchSite & `0xFFF0F000`) \| ((hi & `0xF000`) << `4`) \| (hi & `0x0FFF`);
305	}
306	break;
307	default:
308	ASSERT(false && "Unsupported relocation type");
309	return nullptr;
310	}
311	}
312	else
313	{
314	switch(relocation.getType())
315	{
316	case R_386_NONE:
317	// No relocation
318	break;
319	case R_386_32:
320	patchSite = (int32_t)((intptr_t)symbolValue + patchSite);
321	break;
322	case R_386_PC32:
323	patchSite = (int32_t)((intptr_t)symbolValue + patchSite - (intptr_t)patchSite);
324	break;
325	default:
326	ASSERT(false && "Unsupported relocation type");
327	return nullptr;
328	}
329	}
330
331	return symbolValue;
332	}
333
334	static void relocateSymbol(const* ElfHeader elfHeader, const* Elf64_Rela &relocation, const SectionHeader &relocationTable)
335	{
336	const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
337
338	uint32_t index = relocation.getSymbol();
339	int table = relocationTable.sh_link;
340	void symbolValue = nullptr*;
341
342	if(index != SHN_UNDEF)
343	{
344	if(table == SHN_UNDEF) return nullptr;
345	const SectionHeader *symbolTable = elfSection(elfHeader, table);
346
347	uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
348	if(index >= symtab_entries)
349	{
350	ASSERT(index < symtab_entries && "Symbol Index out of range");
351	return nullptr;
352	}
353
354	intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
355	Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
356	uint16_t section = symbol.st_shndx;
357
358	if(section != SHN_UNDEF && section < SHN_LORESERVE)
359	{
360	const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
361	symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
362	}
363	else
364	{
365	return nullptr;
366	}
367	}
368
369	intptr_t address = (intptr_t)elfHeader + target->sh_offset;
370	unaligned_ptr<int32_t> patchSite32 = (int32_t*)(address + relocation.r_offset);
371	unaligned_ptr<int64_t> patchSite64 = (int64_t*)(address + relocation.r_offset);
372
373	switch(relocation.getType())
374	{
375	case R_X86_64_NONE:
376	// No relocation
377	break;
378	case R_X86_64_64:
379	patchSite64 = (int64_t)((intptr_t)symbolValue + patchSite64 + relocation.r_addend);
380	break;
381	case R_X86_64_PC32:
382	patchSite32 = (int32_t)((intptr_t)symbolValue + patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
383	break;
384	case R_X86_64_32S:
385	patchSite32 = (int32_t)((intptr_t)symbolValue + patchSite32 + relocation.r_addend);
386	break;
387	default:
388	ASSERT(false && "Unsupported relocation type");
389	return nullptr;
390	}
391
392	return symbolValue;
393	}
394
395	void loadImage(uint8_t const elfImage, size_t &codeSize)
396	{
397	ElfHeader elfHeader = (ElfHeader)elfImage;
398
399	if(!elfHeader->checkMagic())
400	{
401	return nullptr;
402	}
403
404	// Expect ELF bitness to match platform
405	ASSERT(sizeof(void*) == `8` ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
406	#if defined(__i386__)
407	ASSERT(sizeof(void*) == `4` && elfHeader->e_machine == EM_386);
408	#elif defined(__x86_64__)
409	ASSERT(sizeof(void*) == `8` && elfHeader->e_machine == EM_X86_64);
410	#elif defined(__arm__)
411	ASSERT(sizeof(void*) == `4` && elfHeader->e_machine == EM_ARM);
412	#elif defined(__aarch64__)
413	ASSERT(sizeof(void*) == `8` && elfHeader->e_machine == EM_AARCH64);
414	#elif defined(__mips__)
415	ASSERT(sizeof(void*) == `4` && elfHeader->e_machine == EM_MIPS);
416	#else
417	#error "Unsupported platform"
418	#endif
419
420	SectionHeader sectionHeader = (SectionHeader)(elfImage + elfHeader->e_shoff);
421	void entry = nullptr*;
422
423	for(int i = `0`; i < elfHeader->e_shnum; i++)
424	{
425	if(sectionHeader[i].sh_type == SHT_PROGBITS)
426	{
427	if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
428	{
429	entry = elfImage + sectionHeader[i].sh_offset;
430	codeSize = sectionHeader[i].sh_size;
431	}
432	}
433	else if(sectionHeader[i].sh_type == SHT_REL)
434	{
435	ASSERT(sizeof(void) == `4` && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code*
436
437	for(Elf32_Word index = `0`; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
438	{
439	const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
440	relocateSymbol(elfHeader, relocation, sectionHeader[i]);
441	}
442	}
443	else if(sectionHeader[i].sh_type == SHT_RELA)
444	{
445	ASSERT(sizeof(void) == `8` && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code*
446
447	for(Elf32_Word index = `0`; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
448	{
449	const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
450	relocateSymbol(elfHeader, relocation, sectionHeader[i]);
451	}
452	}
453	}
454
455	return entry;
456	}
457
458	template<typename T>
459	struct ExecutableAllocator
460	{
461	ExecutableAllocator() {}
462	template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {}
463
464	using value_type = T;
465	using size_type = std::size_t;
466
467	T *allocate(size_type n)
468	{
469	return (T)allocateExecutable(sizeof(T) n);
470	}
471
472	void deallocate(T *p, size_type n)
473	{
474	deallocateExecutable(p, sizeof(T) * n);
475	}
476	};
477
478	class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
479	{
480	ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
481	ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
482
483	public:
484	ELFMemoryStreamer() : Routine (), entry(nullptr)
485	{
486	position = `0`;
487	buffer.reserve(`0x1000`);
488	}
489
490	~ELFMemoryStreamer() override
491	{
492	#if defined(_WIN32)
493	if(buffer.size() != `0`)
494	{
495	DWORD exeProtection;
496	VirtualProtect(&buffer[`0`], buffer.size(), oldProtection, &exeProtection);
497	}
498	#endif
499	}
500
501	void write8(uint8_t Value) override
502	{
503	if(position == (uint64_t)buffer.size())
504	{
505	buffer.push_back(Value);
506	position++;
507	}
508	else if(position < (uint64_t)buffer.size())
509	{
510	buffer [position] = Value;
511	position++;
512	}
513	else ASSERT(false && "UNIMPLEMENTED");
514	}
515
516	void writeBytes(llvm::StringRef Bytes) override
517	{
518	std::size_t oldSize = buffer.size();
519	buffer.resize(oldSize + Bytes.size());
520	memcpy(&buffer [oldSize], Bytes.begin(), Bytes.size());
521	position += Bytes.size();
522	}
523
524	uint64_t tell() const override { return position; }
525
526	void seek(uint64_t Off) override { position = Off; }
527
528	const void getEntry(int* index) override
529	{
530	ASSERT(index == `0`); // Subzero does not support multiple entry points per routine yet.
531	if(!entry)
532	{
533	position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
534
535	size_t codeSize = `0`;
536	entry = loadImage(&buffer [`0`], codeSize);
537
538	#if defined(_WIN32)
539	VirtualProtect(&buffer[`0`], buffer.size(), PAGE_EXECUTE_READ, &oldProtection);
540	FlushInstructionCache(GetCurrentProcess(), NULL, `0`);
541	#else
542	mprotect(&buffer [`0`], buffer.size(), PROT_READ \| PROT_EXEC);
543	__builtin___clear_cache((char)entry, (char**)entry + codeSize);
544	#endif
545	}
546
547	return entry;
548	}
549
550	const void* addConstantData(const void* data, size_t size)
551	{
552	auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[size]);
553	memcpy(buf.get(), data, size);
554	auto ptr = buf.get();
555	constantData.emplace_back(std::move(buf));
556	return ptr;
557	}
558
559	private:
560	void *entry;
561	std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
562	std::size_t position;
563	std::vector<std::unique_ptr<uint8_t[]>> constantData;
564
565	#if defined(_WIN32)
566	DWORD oldProtection;
567	#endif
568	};
569
570	Nucleus::Nucleus()
571	{
572	::codegenMutex.lock(); // Reactor is currently not thread safe
573
574	Ice::ClFlags &Flags = Ice::ClFlags::Flags;
575	Ice::ClFlags::getParsedClFlags(Flags);
576
577	#if defined(__arm__)
578	Flags.setTargetArch(Ice::Target_ARM32);
579	Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
580	#elif defined(__mips__)
581	Flags.setTargetArch(Ice::Target_MIPS32);
582	Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
583	#else // x86
584	Flags.setTargetArch(sizeof(void*) == `8` ? Ice::Target_X8664 : Ice::Target_X8632);
585	Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
586	#endif
587	Flags.setOutFileType(Ice::FT_Elf);
588	Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
589	Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
590	Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
591	Flags.setDisableHybridAssembly(true);
592
593	static llvm::raw_os_ostream cout(std::cout);
594	static llvm::raw_os_ostream cerr(std::cerr);
595
596	if(false) // Write out to a file
597	{
598	std::error_code errorCode;
599	::out = new Ice::Fdstream ("out.o", errorCode, llvm::sys::fs::F_None);
600	::elfFile = new Ice::ELFFileStreamer (*out);
601	::context = new Ice::GlobalContext (&cout, &cout, &cerr, elfFile);
602	}
603	else
604	{
605	ELFMemoryStreamer elfMemory = new* ELFMemoryStreamer ();
606	::context = new Ice::GlobalContext (&cout, &cout, &cerr, elfMemory);
607	::routine = elfMemory;
608	}
609	}
610
611	Nucleus::~Nucleus()
612	{
613	delete ::routine;
614
615	delete ::allocator;
616	delete ::function;
617	delete ::context;
618
619	delete ::elfFile;
620	delete ::out;
621
622	::codegenMutex.unlock();
623	}
624
625	void Nucleus::setDefaultConfig(const Config &cfg)
626	{
627	std::unique_lock<std::mutex> lock(::defaultConfigLock);
628	::defaultConfig() = cfg;
629	}
630
631	void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
632	{
633	std::unique_lock<std::mutex> lock(::defaultConfigLock);
634	auto &config = ::defaultConfig();
635	config = cfgEdit.apply(config);
636	}
637
638	Config Nucleus::getDefaultConfig()
639	{
640	std::unique_lock<std::mutex> lock(::defaultConfigLock);
641	return ::defaultConfig();
642	}
643
644	std::shared_ptr<Routine> Nucleus::acquireRoutine(const char name, const* Config::Edit &cfgEdit / = Config::Edit::None /)
645	{
646	if(basicBlock->getInsts().empty() \|\| basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
647	{
648	createRetVoid();
649	}
650
651	::function->setFunctionName(Ice::GlobalString::createWithString(::context, name));
652
653	rr::optimize(::function);
654
655	::function->translate();
656	ASSERT(!::function->hasError());
657
658	auto globals = ::function->getGlobalInits();
659
660	if(globals && !globals ->empty())
661	{
662	::context->getGlobals()->merge(globals.get());
663	}
664
665	::context->emitFileHeader();
666	::function->emitIAS();
667	auto assembler = ::function->releaseAssembler();
668	auto objectWriter = ::context->getObjectWriter();
669	assembler ->alignFunction();
670	objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
671	::context->lowerGlobals("last");
672	::context->lowerConstants();
673	::context->lowerJumpTables();
674	objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
675	objectWriter->writeNonUserSections();
676
677	Routine *handoffRoutine = ::routine;
678	::routine = nullptr;
679
680	return std::shared_ptr<Routine>(handoffRoutine);
681	}
682
683	Value Nucleus::allocateStackVariable(Type t, int arraySize)
684	{
685	Ice::Type type = T(t);
686	int typeSize = Ice::typeWidthInBytes(type);
687	int totalSize = typeSize * (arraySize ? arraySize : `1`);
688
689	auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
690	auto address = ::function->makeVariable(T(getPointerType(t)));
691	auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
692	::function->getEntryNode()->getInsts().push_front(alloca);
693
694	return V(address);
695	}
696
697	BasicBlock *Nucleus::createBasicBlock()
698	{
699	return B(::function->makeNode());
700	}
701
702	BasicBlock *Nucleus::getInsertBlock()
703	{
704	return B(::basicBlock);
705	}
706
707	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
708	{
709	// ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
710
711	Variable::materializeAll();
712
713	::basicBlock = basicBlock;
714	}
715
716	void Nucleus::createFunction(Type ReturnType, std::vector<Type> &Params)
717	{
718	uint32_t sequenceNumber = `0`;
719	::function = Ice::Cfg::create(::context, sequenceNumber).release();
720	::allocator = new Ice::CfgLocalAllocatorScope (::function);
721
722	for(Type *type : Params)
723	{
724	Ice::Variable *arg = ::function->makeVariable(T(type));
725	::function->addArg(arg);
726	}
727
728	Ice::CfgNode *node = ::function->makeNode();
729	::function->setEntryNode(node);
730	::basicBlock = node;
731	}
732
733	Value Nucleus::getArgument(unsigned* int index)
734	{
735	return V(::function->getArgs()[index]);
736	}
737
738	void Nucleus::createRetVoid()
739	{
740	// Code generated after this point is unreachable, so any variables
741	// being read can safely return an undefined value. We have to avoid
742	// materializing variables after the terminator ret instruction.
743	Variable::killUnmaterialized();
744
745	Ice::InstRet *ret = Ice::InstRet::create(::function);
746	::basicBlock->appendInst(ret);
747	}
748
749	void Nucleus::createRet(Value *v)
750	{
751	// Code generated after this point is unreachable, so any variables
752	// being read can safely return an undefined value. We have to avoid
753	// materializing variables after the terminator ret instruction.
754	Variable::killUnmaterialized();
755
756	Ice::InstRet *ret = Ice::InstRet::create(::function, v);
757	::basicBlock->appendInst(ret);
758	}
759
760	void Nucleus::createBr(BasicBlock *dest)
761	{
762	Variable::materializeAll();
763
764	auto br = Ice::InstBr::create(::function, dest);
765	::basicBlock->appendInst(br);
766	}
767
768	void Nucleus::createCondBr(Value cond, BasicBlock ifTrue, BasicBlock *ifFalse)
769	{
770	Variable::materializeAll();
771
772	auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
773	::basicBlock->appendInst(br);
774	}
775
776	static bool isCommutative(Ice::InstArithmetic::OpKind op)
777	{
778	switch(op)
779	{
780	case Ice::InstArithmetic::Add:
781	case Ice::InstArithmetic::Fadd:
782	case Ice::InstArithmetic::Mul:
783	case Ice::InstArithmetic::Fmul:
784	case Ice::InstArithmetic::And:
785	case Ice::InstArithmetic::Or:
786	case Ice::InstArithmetic::Xor:
787	return true;
788	default:
789	return false;
790	}
791	}
792
793	static Value createArithmetic(Ice::InstArithmetic::OpKind op, Value lhs, Value *rhs)
794	{
795	ASSERT(lhs->getType() == rhs->getType() \|\| llvm::isa<Ice::Constant>(rhs));
796
797	bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
798
799	Ice::Variable *result = ::function->makeVariable(lhs->getType());
800	Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
801	::basicBlock->appendInst(arithmetic);
802
803	return V(result);
804	}
805
806	Value Nucleus::createAdd(Value lhs, Value *rhs)
807	{
808	return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
809	}
810
811	Value Nucleus::createSub(Value lhs, Value *rhs)
812	{
813	return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
814	}
815
816	Value Nucleus::createMul(Value lhs, Value *rhs)
817	{
818	return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
819	}
820
821	Value Nucleus::createUDiv(Value lhs, Value *rhs)
822	{
823	return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
824	}
825
826	Value Nucleus::createSDiv(Value lhs, Value *rhs)
827	{
828	return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
829	}
830
831	Value Nucleus::createFAdd(Value lhs, Value *rhs)
832	{
833	return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
834	}
835
836	Value Nucleus::createFSub(Value lhs, Value *rhs)
837	{
838	return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
839	}
840
841	Value Nucleus::createFMul(Value lhs, Value *rhs)
842	{
843	return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
844	}
845
846	Value Nucleus::createFDiv(Value lhs, Value *rhs)
847	{
848	return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
849	}
850
851	Value Nucleus::createURem(Value lhs, Value *rhs)
852	{
853	return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
854	}
855
856	Value Nucleus::createSRem(Value lhs, Value *rhs)
857	{
858	return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
859	}
860
861	Value Nucleus::createFRem(Value lhs, Value *rhs)
862	{
863	return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
864	}
865
866	Value Nucleus::createShl(Value lhs, Value *rhs)
867	{
868	return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
869	}
870
871	Value Nucleus::createLShr(Value lhs, Value *rhs)
872	{
873	return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
874	}
875
876	Value Nucleus::createAShr(Value lhs, Value *rhs)
877	{
878	return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
879	}
880
881	Value Nucleus::createAnd(Value lhs, Value *rhs)
882	{
883	return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
884	}
885
886	Value Nucleus::createOr(Value lhs, Value *rhs)
887	{
888	return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
889	}
890
891	Value Nucleus::createXor(Value lhs, Value *rhs)
892	{
893	return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
894	}
895
896	Value Nucleus::createNeg(Value v)
897	{
898	return createSub(createNullValue(T(v->getType())), v);
899	}
900
901	Value Nucleus::createFNeg(Value v)
902	{
903	double c[`4`] = {-`0.0`, -`0.0`, -`0.0`, -`0.0`};
904	Value *negativeZero = Ice::isVectorType(v->getType()) ?
905	createConstantVector(c, T(v->getType())) :
906	V(::context->getConstantFloat(-`0.0f`));
907
908	return createFSub(negativeZero, v);
909	}
910
911	Value Nucleus::createNot(Value v)
912	{
913	if(Ice::isScalarIntegerType(v->getType()))
914	{
915	return createXor(v, V(::context->getConstantInt(v->getType(), -`1`)));
916	}
917	else // Vector
918	{
919	int64_t c[`16`] = {-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`};
920	return createXor(v, createConstantVector(c, T(v->getType())));
921	}
922	}
923
924	Value Nucleus::createLoad(Value ptr, Type type, bool* isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
925	{
926	ASSERT(!atomic); // Unimplemented
927	ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
928
929	int valueType = (int)reinterpret_cast<intptr_t>(type);
930	Ice::Variable *result = ::function->makeVariable(T(type));
931
932	if((valueType & EmulatedBits) && (align != `0`)) // Narrow vector not stored on stack.
933	{
934	if(emulateIntrinsics)
935	{
936	if(typeSize(type) == `4`)
937	{
938	auto pointer = RValue<Pointer<Byte>>(ptr);
939	Int x = *Pointer<Int>(pointer);
940
941	Int4 vector;
942	vector = Insert(vector, x, `0`);
943
944	auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
945	::basicBlock->appendInst(bitcast);
946	}
947	else if(typeSize(type) == `8`)
948	{
949	auto pointer = RValue<Pointer<Byte>>(ptr);
950	Int x = *Pointer<Int>(pointer);
951	Int y = *Pointer<Int>(pointer + `4`);
952
953	Int4 vector;
954	vector = Insert(vector, x, `0`);
955	vector = Insert(vector, y, `1`);
956
957	auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
958	::basicBlock->appendInst(bitcast);
959	}
960	else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
961	}
962	else
963	{
964	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
965	auto target = ::context->getConstantUndef(Ice::IceType_i32);
966	auto load = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
967	load->addArg(ptr);
968	load->addArg(::context->getConstantInt32(typeSize(type)));
969	::basicBlock->appendInst(load);
970	}
971	}
972	else
973	{
974	auto load = Ice::InstLoad::create(::function, result, ptr, align);
975	::basicBlock->appendInst(load);
976	}
977
978	return V(result);
979	}
980
981	Value Nucleus::createStore(Value value, Value ptr, Type type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
982	{
983	ASSERT(!atomic); // Unimplemented
984	ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
985
986	#if __has_feature(memory_sanitizer)
987	// Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
988	if(align != `0`)
989	{
990	auto call = Ice::InstCall::create(::function, `2`, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
991	call->addArg(ptr);
992	call->addArg(::context->getConstantInt64(typeSize(type)));
993	::basicBlock->appendInst(call);
994	}
995	#endif
996
997	int valueType = (int)reinterpret_cast<intptr_t>(type);
998
999	if((valueType & EmulatedBits) && (align != `0`)) // Narrow vector not stored on stack.
1000	{
1001	if(emulateIntrinsics)
1002	{
1003	if(typeSize(type) == `4`)
1004	{
1005	Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1006	auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1007	::basicBlock->appendInst(bitcast);
1008
1009	RValue<Int4> v(V(vector));
1010
1011	auto pointer = RValue<Pointer<Byte>>(ptr);
1012	Int x = Extract(v, `0`);
1013	*Pointer<Int>(pointer) = x;
1014	}
1015	else if(typeSize(type) == `8`)
1016	{
1017	Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1018	auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1019	::basicBlock->appendInst(bitcast);
1020
1021	RValue<Int4> v(V(vector));
1022
1023	auto pointer = RValue<Pointer<Byte>>(ptr);
1024	Int x = Extract(v, `0`);
1025	*Pointer<Int>(pointer) = x;
1026	Int y = Extract(v, `1`);
1027	*Pointer<Int>(pointer + `4`) = y;
1028	}
1029	else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
1030	}
1031	else
1032	{
1033	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
1034	auto target = ::context->getConstantUndef(Ice::IceType_i32);
1035	auto store = Ice::InstIntrinsicCall::create(::function, `3`, nullptr, target, intrinsic);
1036	store->addArg(value);
1037	store->addArg(ptr);
1038	store->addArg(::context->getConstantInt32(typeSize(type)));
1039	::basicBlock->appendInst(store);
1040	}
1041	}
1042	else
1043	{
1044	ASSERT(value->getType() == T(type));
1045
1046	auto store = Ice::InstStore::create(::function, value, ptr, align);
1047	::basicBlock->appendInst(store);
1048	}
1049
1050	return value;
1051	}
1052
1053	Value Nucleus::createGEP(Value ptr, Type type, Value index, bool unsignedIndex)
1054	{
1055	ASSERT(index->getType() == Ice::IceType_i32);
1056
1057	if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1058	{
1059	int32_t offset = constant->getValue() * (int)typeSize(type);
1060
1061	if(offset == `0`)
1062	{
1063	return ptr;
1064	}
1065
1066	return createAdd(ptr, createConstantInt(offset));
1067	}
1068
1069	if(!Ice::isByteSizedType(T(type)))
1070	{
1071	index = createMul(index, createConstantInt((int)typeSize(type)));
1072	}
1073
1074	if(sizeof(void*) == `8`)
1075	{
1076	if(unsignedIndex)
1077	{
1078	index = createZExt(index, T(Ice::IceType_i64));
1079	}
1080	else
1081	{
1082	index = createSExt(index, T(Ice::IceType_i64));
1083	}
1084	}
1085
1086	return createAdd(ptr, index);
1087	}
1088
1089	Value Nucleus::createAtomicAdd(Value ptr, Value *value, std::memory_order memoryOrder)
1090	{
1091	UNIMPLEMENTED("createAtomicAdd");
1092	return nullptr;
1093	}
1094
1095	Value Nucleus::createAtomicSub(Value ptr, Value *value, std::memory_order memoryOrder)
1096	{
1097	UNIMPLEMENTED("createAtomicSub");
1098	return nullptr;
1099	}
1100
1101	Value Nucleus::createAtomicAnd(Value ptr, Value *value, std::memory_order memoryOrder)
1102	{
1103	UNIMPLEMENTED("createAtomicAnd");
1104	return nullptr;
1105	}
1106
1107	Value Nucleus::createAtomicOr(Value ptr, Value *value, std::memory_order memoryOrder)
1108	{
1109	UNIMPLEMENTED("createAtomicOr");
1110	return nullptr;
1111	}
1112
1113	Value Nucleus::createAtomicXor(Value ptr, Value *value, std::memory_order memoryOrder)
1114	{
1115	UNIMPLEMENTED("createAtomicXor");
1116	return nullptr;
1117	}
1118
1119	Value Nucleus::createAtomicMin(Value ptr, Value *value, std::memory_order memoryOrder)
1120	{
1121	UNIMPLEMENTED("createAtomicMin");
1122	return nullptr;
1123	}
1124
1125	Value Nucleus::createAtomicMax(Value ptr, Value *value, std::memory_order memoryOrder)
1126	{
1127	UNIMPLEMENTED("createAtomicMax");
1128	return nullptr;
1129	}
1130
1131	Value Nucleus::createAtomicUMin(Value ptr, Value *value, std::memory_order memoryOrder)
1132	{
1133	UNIMPLEMENTED("createAtomicUMin");
1134	return nullptr;
1135	}
1136
1137	Value Nucleus::createAtomicUMax(Value ptr, Value *value, std::memory_order memoryOrder)
1138	{
1139	UNIMPLEMENTED("createAtomicUMax");
1140	return nullptr;
1141	}
1142
1143	Value Nucleus::createAtomicExchange(Value ptr, Value *value, std::memory_order memoryOrder)
1144	{
1145	UNIMPLEMENTED("createAtomicExchange");
1146	return nullptr;
1147	}
1148
1149	Value Nucleus::createAtomicCompareExchange(Value ptr, Value value, Value compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1150	{
1151	UNIMPLEMENTED("createAtomicCompareExchange");
1152	return nullptr;
1153	}
1154
1155	static Value createCast(Ice::InstCast::OpKind op, Value v, Type *destType)
1156	{
1157	if(v->getType() == T(destType))
1158	{
1159	return v;
1160	}
1161
1162	Ice::Variable *result = ::function->makeVariable(T(destType));
1163	Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1164	::basicBlock->appendInst(cast);
1165
1166	return V(result);
1167	}
1168
1169	Value Nucleus::createTrunc(Value v, Type *destType)
1170	{
1171	return createCast(Ice::InstCast::Trunc, v, destType);
1172	}
1173
1174	Value Nucleus::createZExt(Value v, Type *destType)
1175	{
1176	return createCast(Ice::InstCast::Zext, v, destType);
1177	}
1178
1179	Value Nucleus::createSExt(Value v, Type *destType)
1180	{
1181	return createCast(Ice::InstCast::Sext, v, destType);
1182	}
1183
1184	Value Nucleus::createFPToSI(Value v, Type *destType)
1185	{
1186	return createCast(Ice::InstCast::Fptosi, v, destType);
1187	}
1188
1189	Value Nucleus::createSIToFP(Value v, Type *destType)
1190	{
1191	return createCast(Ice::InstCast::Sitofp, v, destType);
1192	}
1193
1194	Value Nucleus::createFPTrunc(Value v, Type *destType)
1195	{
1196	return createCast(Ice::InstCast::Fptrunc, v, destType);
1197	}
1198
1199	Value Nucleus::createFPExt(Value v, Type *destType)
1200	{
1201	return createCast(Ice::InstCast::Fpext, v, destType);
1202	}
1203
1204	Value Nucleus::createBitCast(Value v, Type *destType)
1205	{
1206	// Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1207	// support for casting between scalars and wide vectors. For platforms where this is not supported,
1208	// emulate them by writing to the stack and reading back as the destination type.
1209	if(emulateMismatchedBitCast)
1210	{
1211	if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1212	{
1213	Value *address = allocateStackVariable(destType);
1214	createStore(v, address, T(v->getType()));
1215	return createLoad(address, destType);
1216	}
1217	else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1218	{
1219	Value *address = allocateStackVariable(T(v->getType()));
1220	createStore(v, address, T(v->getType()));
1221	return createLoad(address, destType);
1222	}
1223	}
1224
1225	return createCast(Ice::InstCast::Bitcast, v, destType);
1226	}
1227
1228	static Value createIntCompare(Ice::InstIcmp::ICond condition, Value lhs, Value *rhs)
1229	{
1230	ASSERT(lhs->getType() == rhs->getType());
1231
1232	auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1233	auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1234	::basicBlock->appendInst(cmp);
1235
1236	return V(result);
1237	}
1238
1239	Value Nucleus::createPtrEQ(Value lhs, Value *rhs)
1240	{
1241	return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1242	}
1243
1244	Value Nucleus::createICmpEQ(Value lhs, Value *rhs)
1245	{
1246	return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1247	}
1248
1249	Value Nucleus::createICmpNE(Value lhs, Value *rhs)
1250	{
1251	return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1252	}
1253
1254	Value Nucleus::createICmpUGT(Value lhs, Value *rhs)
1255	{
1256	return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1257	}
1258
1259	Value Nucleus::createICmpUGE(Value lhs, Value *rhs)
1260	{
1261	return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1262	}
1263
1264	Value Nucleus::createICmpULT(Value lhs, Value *rhs)
1265	{
1266	return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1267	}
1268
1269	Value Nucleus::createICmpULE(Value lhs, Value *rhs)
1270	{
1271	return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1272	}
1273
1274	Value Nucleus::createICmpSGT(Value lhs, Value *rhs)
1275	{
1276	return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1277	}
1278
1279	Value Nucleus::createICmpSGE(Value lhs, Value *rhs)
1280	{
1281	return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1282	}
1283
1284	Value Nucleus::createICmpSLT(Value lhs, Value *rhs)
1285	{
1286	return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1287	}
1288
1289	Value Nucleus::createICmpSLE(Value lhs, Value *rhs)
1290	{
1291	return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1292	}
1293
1294	static Value createFloatCompare(Ice::InstFcmp::FCond condition, Value lhs, Value *rhs)
1295	{
1296	ASSERT(lhs->getType() == rhs->getType());
1297	ASSERT(Ice::isScalarFloatingType(lhs->getType()) \|\| lhs->getType() == Ice::IceType_v4f32);
1298
1299	auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1300	auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1301	::basicBlock->appendInst(cmp);
1302
1303	return V(result);
1304	}
1305
1306	Value Nucleus::createFCmpOEQ(Value lhs, Value *rhs)
1307	{
1308	return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1309	}
1310
1311	Value Nucleus::createFCmpOGT(Value lhs, Value *rhs)
1312	{
1313	return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1314	}
1315
1316	Value Nucleus::createFCmpOGE(Value lhs, Value *rhs)
1317	{
1318	return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1319	}
1320
1321	Value Nucleus::createFCmpOLT(Value lhs, Value *rhs)
1322	{
1323	return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1324	}
1325
1326	Value Nucleus::createFCmpOLE(Value lhs, Value *rhs)
1327	{
1328	return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1329	}
1330
1331	Value Nucleus::createFCmpONE(Value lhs, Value *rhs)
1332	{
1333	return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1334	}
1335
1336	Value Nucleus::createFCmpORD(Value lhs, Value *rhs)
1337	{
1338	return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1339	}
1340
1341	Value Nucleus::createFCmpUNO(Value lhs, Value *rhs)
1342	{
1343	return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1344	}
1345
1346	Value Nucleus::createFCmpUEQ(Value lhs, Value *rhs)
1347	{
1348	return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1349	}
1350
1351	Value Nucleus::createFCmpUGT(Value lhs, Value *rhs)
1352	{
1353	return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1354	}
1355
1356	Value Nucleus::createFCmpUGE(Value lhs, Value *rhs)
1357	{
1358	return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1359	}
1360
1361	Value Nucleus::createFCmpULT(Value lhs, Value *rhs)
1362	{
1363	return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1364	}
1365
1366	Value Nucleus::createFCmpULE(Value lhs, Value *rhs)
1367	{
1368	return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1369	}
1370
1371	Value Nucleus::createFCmpUNE(Value lhs, Value *rhs)
1372	{
1373	return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1374	}
1375
1376	Value Nucleus::createExtractElement(Value vector, Type type, int* index)
1377	{
1378	auto result = ::function->makeVariable(T(type));
1379	auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1380	::basicBlock->appendInst(extract);
1381
1382	return V(result);
1383	}
1384
1385	Value Nucleus::createInsertElement(Value vector, Value element, int* index)
1386	{
1387	auto result = ::function->makeVariable(vector->getType());
1388	auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1389	::basicBlock->appendInst(insert);
1390
1391	return V(result);
1392	}
1393
1394	Value Nucleus::createShuffleVector(Value V1, Value V2, const* int *select)
1395	{
1396	ASSERT(V1->getType() == V2->getType());
1397
1398	int size = Ice::typeNumElements(V1->getType());
1399	auto result = ::function->makeVariable(V1->getType());
1400	auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1401
1402	for(int i = `0`; i < size; i++)
1403	{
1404	shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1405	}
1406
1407	::basicBlock->appendInst(shuffle);
1408
1409	return V(result);
1410	}
1411
1412	Value Nucleus::createSelect(Value C, Value ifTrue, Value ifFalse)
1413	{
1414	ASSERT(ifTrue->getType() == ifFalse->getType());
1415
1416	auto result = ::function->makeVariable(ifTrue->getType());
1417	auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1418	::basicBlock->appendInst(select);
1419
1420	return V(result);
1421	}
1422
1423	SwitchCases Nucleus::createSwitch(Value control, BasicBlock defaultBranch, unsigned* numCases)
1424	{
1425	auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1426	::basicBlock->appendInst(switchInst);
1427
1428	return reinterpret_cast<SwitchCases*>(switchInst);
1429	}
1430
1431	void Nucleus::addSwitchCase(SwitchCases switchCases, int* label, BasicBlock *branch)
1432	{
1433	switchCases->addBranch(label, label, branch);
1434	}
1435
1436	void Nucleus::createUnreachable()
1437	{
1438	Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1439	::basicBlock->appendInst(unreachable);
1440	}
1441
1442	Type Nucleus::getPointerType(Type ElementType)
1443	{
1444	if(sizeof(void*) == `8`)
1445	{
1446	return T(Ice::IceType_i64);
1447	}
1448	else
1449	{
1450	return T(Ice::IceType_i32);
1451	}
1452	}
1453
1454	Value Nucleus::createNullValue(Type Ty)
1455	{
1456	if(Ice::isVectorType(T(Ty)))
1457	{
1458	ASSERT(Ice::typeNumElements(T(Ty)) <= `16`);
1459	int64_t c[`16`] = {`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`};
1460	return createConstantVector(c, Ty);
1461	}
1462	else
1463	{
1464	return V(::context->getConstantZero(T(Ty)));
1465	}
1466	}
1467
1468	Value *Nucleus::createConstantLong(int64_t i)
1469	{
1470	return V(::context->getConstantInt64(i));
1471	}
1472
1473	Value Nucleus::createConstantInt(int* i)
1474	{
1475	return V(::context->getConstantInt32(i));
1476	}
1477
1478	Value Nucleus::createConstantInt(unsigned* int i)
1479	{
1480	return V(::context->getConstantInt32(i));
1481	}
1482
1483	Value Nucleus::createConstantBool(bool* b)
1484	{
1485	return V(::context->getConstantInt1(b));
1486	}
1487
1488	Value Nucleus::createConstantByte(signed* char i)
1489	{
1490	return V(::context->getConstantInt8(i));
1491	}
1492
1493	Value Nucleus::createConstantByte(unsigned* char i)
1494	{
1495	return V(::context->getConstantInt8(i));
1496	}
1497
1498	Value Nucleus::createConstantShort(short* i)
1499	{
1500	return V(::context->getConstantInt16(i));
1501	}
1502
1503	Value Nucleus::createConstantShort(unsigned* short i)
1504	{
1505	return V(::context->getConstantInt16(i));
1506	}
1507
1508	Value Nucleus::createConstantFloat(float* x)
1509	{
1510	return V(::context->getConstantFloat(x));
1511	}
1512
1513	Value Nucleus::createNullPointer(Type Ty)
1514	{
1515	return createNullValue(T(sizeof(void*) == `8` ? Ice::IceType_i64 : Ice::IceType_i32));
1516	}
1517
1518	Value Nucleus::createConstantVector(const* int64_t constants, Type type)
1519	{
1520	const int vectorSize = `16`;
1521	ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
1522	const int alignment = vectorSize;
1523	auto globalPool = ::function->getGlobalPool();
1524
1525	const int64_t *i = constants;
1526	const double f = reinterpret_cast<const* double*>(constants);
1527	Ice::VariableDeclaration::DataInitializer dataInitializer = nullptr*;
1528
1529	switch((int)reinterpret_cast<intptr_t>(type))
1530	{
1531	case Ice::IceType_v4i32:
1532	case Ice::IceType_v4i1:
1533	{
1534	const int initializer[`4`] = {(int)i[`0`], (int)i[`1`], (int)i[`2`], (int)i[`3`]};
1535	static_assert(sizeof(initializer) == vectorSize, "!");
1536	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1537	}
1538	break;
1539	case Ice::IceType_v4f32:
1540	{
1541	const float initializer[`4`] = {(float)f[`0`], (float)f[`1`], (float)f[`2`], (float)f[`3`]};
1542	static_assert(sizeof(initializer) == vectorSize, "!");
1543	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1544	}
1545	break;
1546	case Ice::IceType_v8i16:
1547	case Ice::IceType_v8i1:
1548	{
1549	const short initializer[`8`] = {(short)i[`0`], (short)i[`1`], (short)i[`2`], (short)i[`3`], (short)i[`4`], (short)i[`5`], (short)i[`6`], (short)i[`7`]};
1550	static_assert(sizeof(initializer) == vectorSize, "!");
1551	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1552	}
1553	break;
1554	case Ice::IceType_v16i8:
1555	case Ice::IceType_v16i1:
1556	{
1557	const char initializer[`16`] = {(char)i[`0`], (char)i[`1`], (char)i[`2`], (char)i[`3`], (char)i[`4`], (char)i[`5`], (char)i[`6`], (char)i[`7`], (char)i[`8`], (char)i[`9`], (char)i[`10`], (char)i[`11`], (char)i[`12`], (char)i[`13`], (char)i[`14`], (char)i[`15`]};
1558	static_assert(sizeof(initializer) == vectorSize, "!");
1559	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1560	}
1561	break;
1562	case Type_v2i32:
1563	{
1564	const int initializer[`4`] = {(int)i[`0`], (int)i[`1`], (int)i[`0`], (int)i[`1`]};
1565	static_assert(sizeof(initializer) == vectorSize, "!");
1566	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1567	}
1568	break;
1569	case Type_v2f32:
1570	{
1571	const float initializer[`4`] = {(float)f[`0`], (float)f[`1`], (float)f[`0`], (float)f[`1`]};
1572	static_assert(sizeof(initializer) == vectorSize, "!");
1573	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1574	}
1575	break;
1576	case Type_v4i16:
1577	{
1578	const short initializer[`8`] = {(short)i[`0`], (short)i[`1`], (short)i[`2`], (short)i[`3`], (short)i[`0`], (short)i[`1`], (short)i[`2`], (short)i[`3`]};
1579	static_assert(sizeof(initializer) == vectorSize, "!");
1580	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1581	}
1582	break;
1583	case Type_v8i8:
1584	{
1585	const char initializer[`16`] = {(char)i[`0`], (char)i[`1`], (char)i[`2`], (char)i[`3`], (char)i[`4`], (char)i[`5`], (char)i[`6`], (char)i[`7`], (char)i[`0`], (char)i[`1`], (char)i[`2`], (char)i[`3`], (char)i[`4`], (char)i[`5`], (char)i[`6`], (char)i[`7`]};
1586	static_assert(sizeof(initializer) == vectorSize, "!");
1587	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1588	}
1589	break;
1590	case Type_v4i8:
1591	{
1592	const char initializer[`16`] = {(char)i[`0`], (char)i[`1`], (char)i[`2`], (char)i[`3`], (char)i[`0`], (char)i[`1`], (char)i[`2`], (char)i[`3`], (char)i[`0`], (char)i[`1`], (char)i[`2`], (char)i[`3`], (char)i[`0`], (char)i[`1`], (char)i[`2`], (char)i[`3`]};
1593	static_assert(sizeof(initializer) == vectorSize, "!");
1594	dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1595	}
1596	break;
1597	default:
1598	UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
1599	}
1600
1601	auto name = Ice::GlobalString::createWithoutString(::context);
1602	auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1603	variableDeclaration->setName(name);
1604	variableDeclaration->setAlignment(alignment);
1605	variableDeclaration->setIsConstant(true);
1606	variableDeclaration->addInitializer(dataInitializer);
1607
1608	::function->addGlobal(variableDeclaration);
1609
1610	constexpr int32_t offset = `0`;
1611	Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1612
1613	Ice::Variable *result = ::function->makeVariable(T(type));
1614	auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1615	::basicBlock->appendInst(load);
1616
1617	return V(result);
1618	}
1619
1620	Value Nucleus::createConstantVector(const* double constants, Type type)
1621	{
1622	return createConstantVector((const int64_t*)constants, type);
1623	}
1624
1625	Type *Void::getType()
1626	{
1627	return T(Ice::IceType_void);
1628	}
1629
1630	Type *Bool::getType()
1631	{
1632	return T(Ice::IceType_i1);
1633	}
1634
1635	Type *Byte::getType()
1636	{
1637	return T(Ice::IceType_i8);
1638	}
1639
1640	Type *SByte::getType()
1641	{
1642	return T(Ice::IceType_i8);
1643	}
1644
1645	Type *Short::getType()
1646	{
1647	return T(Ice::IceType_i16);
1648	}
1649
1650	Type *UShort::getType()
1651	{
1652	return T(Ice::IceType_i16);
1653	}
1654
1655	Type *Byte4::getType()
1656	{
1657	return T(Type_v4i8);
1658	}
1659
1660	Type *SByte4::getType()
1661	{
1662	return T(Type_v4i8);
1663	}
1664
1665	namespace
1666	{
1667	RValue<Byte> SaturateUnsigned(RValue<Short> x)
1668	{
1669	return Byte (IfThenElse(Int (x) > `0xFF`, Int (`0xFF`), IfThenElse(Int (x) < `0`, Int (`0`), Int (x))));
1670	}
1671
1672	RValue<Byte> Extract(RValue<Byte8> val, int i)
1673	{
1674	return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1675	}
1676
1677	RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1678	{
1679	return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1680	}
1681	}
1682
1683	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1684	{
1685	if(emulateIntrinsics)
1686	{
1687	Byte8 result;
1688	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `0`)) + Int (Extract(y, `0`)))), `0`);
1689	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `1`)) + Int (Extract(y, `1`)))), `1`);
1690	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `2`)) + Int (Extract(y, `2`)))), `2`);
1691	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `3`)) + Int (Extract(y, `3`)))), `3`);
1692	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `4`)) + Int (Extract(y, `4`)))), `4`);
1693	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `5`)) + Int (Extract(y, `5`)))), `5`);
1694	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `6`)) + Int (Extract(y, `6`)))), `6`);
1695	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `7`)) + Int (Extract(y, `7`)))), `7`);
1696
1697	return result;
1698	}
1699	else
1700	{
1701	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1702	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1703	auto target = ::context->getConstantUndef(Ice::IceType_i32);
1704	auto paddusb = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
1705	paddusb->addArg(x.value);
1706	paddusb->addArg(y.value);
1707	::basicBlock->appendInst(paddusb);
1708
1709	return RValue<Byte8>(V(result));
1710	}
1711	}
1712
1713	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
1714	{
1715	if(emulateIntrinsics)
1716	{
1717	Byte8 result;
1718	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `0`)) - Int (Extract(y, `0`)))), `0`);
1719	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `1`)) - Int (Extract(y, `1`)))), `1`);
1720	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `2`)) - Int (Extract(y, `2`)))), `2`);
1721	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `3`)) - Int (Extract(y, `3`)))), `3`);
1722	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `4`)) - Int (Extract(y, `4`)))), `4`);
1723	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `5`)) - Int (Extract(y, `5`)))), `5`);
1724	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `6`)) - Int (Extract(y, `6`)))), `6`);
1725	result = Insert(result, SaturateUnsigned(Short (Int (Extract(x, `7`)) - Int (Extract(y, `7`)))), `7`);
1726
1727	return result;
1728	}
1729	else
1730	{
1731	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1732	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1733	auto target = ::context->getConstantUndef(Ice::IceType_i32);
1734	auto psubusw = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
1735	psubusw->addArg(x.value);
1736	psubusw->addArg(y.value);
1737	::basicBlock->appendInst(psubusw);
1738
1739	return RValue<Byte8>(V(result));
1740	}
1741	}
1742
1743	RValue<SByte> Extract(RValue<SByte8> val, int i)
1744	{
1745	return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
1746	}
1747
1748	RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
1749	{
1750	return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
1751	}
1752
1753	RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1754	{
1755	if(emulateIntrinsics)
1756	{
1757	SByte8 result;
1758	result = Insert(result, Extract(lhs, `0`) >> SByte (rhs), `0`);
1759	result = Insert(result, Extract(lhs, `1`) >> SByte (rhs), `1`);
1760	result = Insert(result, Extract(lhs, `2`) >> SByte (rhs), `2`);
1761	result = Insert(result, Extract(lhs, `3`) >> SByte (rhs), `3`);
1762	result = Insert(result, Extract(lhs, `4`) >> SByte (rhs), `4`);
1763	result = Insert(result, Extract(lhs, `5`) >> SByte (rhs), `5`);
1764	result = Insert(result, Extract(lhs, `6`) >> SByte (rhs), `6`);
1765	result = Insert(result, Extract(lhs, `7`) >> SByte (rhs), `7`);
1766
1767	return result;
1768	}
1769	else
1770	{
1771	#if defined(__i386__) \|\| defined(__x86_64__)
1772	// SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
1773	RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4 (`0xFF00u`);
1774	RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << `8`) >> rhs) >> `8`);
1775
1776	return As<SByte8>(hi \| lo);
1777	#else
1778	return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1779	#endif
1780	}
1781	}
1782
1783	RValue<Int> SignMask(RValue<Byte8> x)
1784	{
1785	if(emulateIntrinsics \|\| CPUID::ARM)
1786	{
1787	Byte8 xx = As<Byte8>(As<SByte8>(x) >> `7`) & Byte8 (`0x01`, `0x02`, `0x04`, `0x08`, `0x10`, `0x20`, `0x40`, `0x80`);
1788	return Int (Extract(xx, `0`)) \| Int (Extract(xx, `1`)) \| Int (Extract(xx, `2`)) \| Int (Extract(xx, `3`)) \| Int (Extract(xx, `4`)) \| Int (Extract(xx, `5`)) \| Int (Extract(xx, `6`)) \| Int (Extract(xx, `7`));
1789	}
1790	else
1791	{
1792	Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1793	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1794	auto target = ::context->getConstantUndef(Ice::IceType_i32);
1795	auto movmsk = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
1796	movmsk->addArg(x.value);
1797	::basicBlock->appendInst(movmsk);
1798
1799	return RValue<Int>(V(result)) & `0xFF`;
1800	}
1801	}
1802
1803	// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
1804	// {
1805	// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
1806	// }
1807
1808	RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
1809	{
1810	return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1811	}
1812
1813	Type *Byte8::getType()
1814	{
1815	return T(Type_v8i8);
1816	}
1817
1818	// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
1819	// {
1820	// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
1821	// }
1822
1823	// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1824	// {
1825	// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1826	// }
1827
1828	RValue<SByte> SaturateSigned(RValue<Short> x)
1829	{
1830	return SByte (IfThenElse(Int (x) > `0x7F`, Int (`0x7F`), IfThenElse(Int (x) < -`0x80`, Int (`0x80`), Int (x))));
1831	}
1832
1833	RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
1834	{
1835	if(emulateIntrinsics)
1836	{
1837	SByte8 result;
1838	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `0`)) + Int (Extract(y, `0`)))), `0`);
1839	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `1`)) + Int (Extract(y, `1`)))), `1`);
1840	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `2`)) + Int (Extract(y, `2`)))), `2`);
1841	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `3`)) + Int (Extract(y, `3`)))), `3`);
1842	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `4`)) + Int (Extract(y, `4`)))), `4`);
1843	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `5`)) + Int (Extract(y, `5`)))), `5`);
1844	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `6`)) + Int (Extract(y, `6`)))), `6`);
1845	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `7`)) + Int (Extract(y, `7`)))), `7`);
1846
1847	return result;
1848	}
1849	else
1850	{
1851	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1852	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1853	auto target = ::context->getConstantUndef(Ice::IceType_i32);
1854	auto paddsb = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
1855	paddsb->addArg(x.value);
1856	paddsb->addArg(y.value);
1857	::basicBlock->appendInst(paddsb);
1858
1859	return RValue<SByte8>(V(result));
1860	}
1861	}
1862
1863	RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
1864	{
1865	if(emulateIntrinsics)
1866	{
1867	SByte8 result;
1868	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `0`)) - Int (Extract(y, `0`)))), `0`);
1869	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `1`)) - Int (Extract(y, `1`)))), `1`);
1870	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `2`)) - Int (Extract(y, `2`)))), `2`);
1871	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `3`)) - Int (Extract(y, `3`)))), `3`);
1872	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `4`)) - Int (Extract(y, `4`)))), `4`);
1873	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `5`)) - Int (Extract(y, `5`)))), `5`);
1874	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `6`)) - Int (Extract(y, `6`)))), `6`);
1875	result = Insert(result, SaturateSigned(Short (Int (Extract(x, `7`)) - Int (Extract(y, `7`)))), `7`);
1876
1877	return result;
1878	}
1879	else
1880	{
1881	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1882	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1883	auto target = ::context->getConstantUndef(Ice::IceType_i32);
1884	auto psubsb = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
1885	psubsb->addArg(x.value);
1886	psubsb->addArg(y.value);
1887	::basicBlock->appendInst(psubsb);
1888
1889	return RValue<SByte8>(V(result));
1890	}
1891	}
1892
1893	RValue<Int> SignMask(RValue<SByte8> x)
1894	{
1895	if(emulateIntrinsics \|\| CPUID::ARM)
1896	{
1897	SByte8 xx = (x >> `7`) & SByte8 (`0x01`, `0x02`, `0x04`, `0x08`, `0x10`, `0x20`, `0x40`, `0x80`);
1898	return Int (Extract(xx, `0`)) \| Int (Extract(xx, `1`)) \| Int (Extract(xx, `2`)) \| Int (Extract(xx, `3`)) \| Int (Extract(xx, `4`)) \| Int (Extract(xx, `5`)) \| Int (Extract(xx, `6`)) \| Int (Extract(xx, `7`));
1899	}
1900	else
1901	{
1902	Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1903	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1904	auto target = ::context->getConstantUndef(Ice::IceType_i32);
1905	auto movmsk = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
1906	movmsk->addArg(x.value);
1907	::basicBlock->appendInst(movmsk);
1908
1909	return RValue<Int>(V(result)) & `0xFF`;
1910	}
1911	}
1912
1913	RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
1914	{
1915	return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
1916	}
1917
1918	RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
1919	{
1920	return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1921	}
1922
1923	Type *SByte8::getType()
1924	{
1925	return T(Type_v8i8);
1926	}
1927
1928	Type *Byte16::getType()
1929	{
1930	return T(Ice::IceType_v16i8);
1931	}
1932
1933	Type *SByte16::getType()
1934	{
1935	return T(Ice::IceType_v16i8);
1936	}
1937
1938	Type *Short2::getType()
1939	{
1940	return T(Type_v2i16);
1941	}
1942
1943	Type *UShort2::getType()
1944	{
1945	return T(Type_v2i16);
1946	}
1947
1948	Short4::Short4(RValue<Int4> cast)
1949	{
1950	int select[`8`] = {`0`, `2`, `4`, `6`, `0`, `2`, `4`, `6`};
1951	Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
1952	Value *packed = Nucleus::createShuffleVector(short8, short8, select);
1953
1954	Value *int2 = RValue<Int2>(Int2 (As<Int4>(packed))).value;
1955	Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
1956
1957	storeValue(short4);
1958	}
1959
1960	// Short4::Short4(RValue<Float> cast)
1961	// {
1962	// }
1963
1964	Short4::Short4(RValue<Float4> cast)
1965	{
1966	UNIMPLEMENTED("Short4::Short4(RValue<Float4> cast)");
1967	}
1968
1969	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
1970	{
1971	if(emulateIntrinsics)
1972	{
1973	Short4 result;
1974	result = Insert(result, Extract(lhs, `0`) << Short (rhs), `0`);
1975	result = Insert(result, Extract(lhs, `1`) << Short (rhs), `1`);
1976	result = Insert(result, Extract(lhs, `2`) << Short (rhs), `2`);
1977	result = Insert(result, Extract(lhs, `3`) << Short (rhs), `3`);
1978
1979	return result;
1980	}
1981	else
1982	{
1983	return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
1984	}
1985	}
1986
1987	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
1988	{
1989	if(emulateIntrinsics)
1990	{
1991	Short4 result;
1992	result = Insert(result, Extract(lhs, `0`) >> Short (rhs), `0`);
1993	result = Insert(result, Extract(lhs, `1`) >> Short (rhs), `1`);
1994	result = Insert(result, Extract(lhs, `2`) >> Short (rhs), `2`);
1995	result = Insert(result, Extract(lhs, `3`) >> Short (rhs), `3`);
1996
1997	return result;
1998	}
1999	else
2000	{
2001	return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2002	}
2003	}
2004
2005	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2006	{
2007	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2008	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2009	::basicBlock->appendInst(cmp);
2010
2011	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2012	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2013	::basicBlock->appendInst(select);
2014
2015	return RValue<Short4>(V(result));
2016	}
2017
2018	RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2019	{
2020	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2021	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2022	::basicBlock->appendInst(cmp);
2023
2024	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2025	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2026	::basicBlock->appendInst(select);
2027
2028	return RValue<Short4>(V(result));
2029	}
2030
2031	RValue<Short> SaturateSigned(RValue<Int> x)
2032	{
2033	return Short (IfThenElse(x > `0x7FFF`, Int (`0x7FFF`), IfThenElse(x < -`0x8000`, Int (`0x8000`), x)));
2034	}
2035
2036	RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2037	{
2038	if(emulateIntrinsics)
2039	{
2040	Short4 result;
2041	result = Insert(result, SaturateSigned(Int (Extract(x, `0`)) + Int (Extract(y, `0`))), `0`);
2042	result = Insert(result, SaturateSigned(Int (Extract(x, `1`)) + Int (Extract(y, `1`))), `1`);
2043	result = Insert(result, SaturateSigned(Int (Extract(x, `2`)) + Int (Extract(y, `2`))), `2`);
2044	result = Insert(result, SaturateSigned(Int (Extract(x, `3`)) + Int (Extract(y, `3`))), `3`);
2045
2046	return result;
2047	}
2048	else
2049	{
2050	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2051	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2052	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2053	auto paddsw = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2054	paddsw->addArg(x.value);
2055	paddsw->addArg(y.value);
2056	::basicBlock->appendInst(paddsw);
2057
2058	return RValue<Short4>(V(result));
2059	}
2060	}
2061
2062	RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2063	{
2064	if(emulateIntrinsics)
2065	{
2066	Short4 result;
2067	result = Insert(result, SaturateSigned(Int (Extract(x, `0`)) - Int (Extract(y, `0`))), `0`);
2068	result = Insert(result, SaturateSigned(Int (Extract(x, `1`)) - Int (Extract(y, `1`))), `1`);
2069	result = Insert(result, SaturateSigned(Int (Extract(x, `2`)) - Int (Extract(y, `2`))), `2`);
2070	result = Insert(result, SaturateSigned(Int (Extract(x, `3`)) - Int (Extract(y, `3`))), `3`);
2071
2072	return result;
2073	}
2074	else
2075	{
2076	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2077	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2078	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2079	auto psubsw = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2080	psubsw->addArg(x.value);
2081	psubsw->addArg(y.value);
2082	::basicBlock->appendInst(psubsw);
2083
2084	return RValue<Short4>(V(result));
2085	}
2086	}
2087
2088	RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2089	{
2090	if(emulateIntrinsics)
2091	{
2092	Short4 result;
2093	result = Insert(result, Short ((Int (Extract(x, `0`)) * Int (Extract(y, `0`))) >> `16`), `0`);
2094	result = Insert(result, Short ((Int (Extract(x, `1`)) * Int (Extract(y, `1`))) >> `16`), `1`);
2095	result = Insert(result, Short ((Int (Extract(x, `2`)) * Int (Extract(y, `2`))) >> `16`), `2`);
2096	result = Insert(result, Short ((Int (Extract(x, `3`)) * Int (Extract(y, `3`))) >> `16`), `3`);
2097
2098	return result;
2099	}
2100	else
2101	{
2102	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2103	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2104	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2105	auto pmulhw = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2106	pmulhw->addArg(x.value);
2107	pmulhw->addArg(y.value);
2108	::basicBlock->appendInst(pmulhw);
2109
2110	return RValue<Short4>(V(result));
2111	}
2112	}
2113
2114	RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2115	{
2116	if(emulateIntrinsics)
2117	{
2118	Int2 result;
2119	result = Insert(result, Int (Extract(x, `0`)) * Int (Extract(y, `0`)) + Int (Extract(x, `1`)) * Int (Extract(y, `1`)), `0`);
2120	result = Insert(result, Int (Extract(x, `2`)) * Int (Extract(y, `2`)) + Int (Extract(x, `3`)) * Int (Extract(y, `3`)), `1`);
2121
2122	return result;
2123	}
2124	else
2125	{
2126	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2127	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2128	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2129	auto pmaddwd = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2130	pmaddwd->addArg(x.value);
2131	pmaddwd->addArg(y.value);
2132	::basicBlock->appendInst(pmaddwd);
2133
2134	return As<Int2>(V(result));
2135	}
2136	}
2137
2138	RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2139	{
2140	if(emulateIntrinsics)
2141	{
2142	SByte8 result;
2143	result = Insert(result, SaturateSigned(Extract(x, `0`)), `0`);
2144	result = Insert(result, SaturateSigned(Extract(x, `1`)), `1`);
2145	result = Insert(result, SaturateSigned(Extract(x, `2`)), `2`);
2146	result = Insert(result, SaturateSigned(Extract(x, `3`)), `3`);
2147	result = Insert(result, SaturateSigned(Extract(y, `0`)), `4`);
2148	result = Insert(result, SaturateSigned(Extract(y, `1`)), `5`);
2149	result = Insert(result, SaturateSigned(Extract(y, `2`)), `6`);
2150	result = Insert(result, SaturateSigned(Extract(y, `3`)), `7`);
2151
2152	return result;
2153	}
2154	else
2155	{
2156	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2157	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2158	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2159	auto pack = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2160	pack->addArg(x.value);
2161	pack->addArg(y.value);
2162	::basicBlock->appendInst(pack);
2163
2164	return As<SByte8>(Swizzle(As<Int4>(V(result)), `0x88`));
2165	}
2166	}
2167
2168	RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2169	{
2170	if(emulateIntrinsics)
2171	{
2172	Byte8 result;
2173	result = Insert(result, SaturateUnsigned(Extract(x, `0`)), `0`);
2174	result = Insert(result, SaturateUnsigned(Extract(x, `1`)), `1`);
2175	result = Insert(result, SaturateUnsigned(Extract(x, `2`)), `2`);
2176	result = Insert(result, SaturateUnsigned(Extract(x, `3`)), `3`);
2177	result = Insert(result, SaturateUnsigned(Extract(y, `0`)), `4`);
2178	result = Insert(result, SaturateUnsigned(Extract(y, `1`)), `5`);
2179	result = Insert(result, SaturateUnsigned(Extract(y, `2`)), `6`);
2180	result = Insert(result, SaturateUnsigned(Extract(y, `3`)), `7`);
2181
2182	return result;
2183	}
2184	else
2185	{
2186	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2187	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2188	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2189	auto pack = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2190	pack->addArg(x.value);
2191	pack->addArg(y.value);
2192	::basicBlock->appendInst(pack);
2193
2194	return As<Byte8>(Swizzle(As<Int4>(V(result)), `0x88`));
2195	}
2196	}
2197
2198	RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2199	{
2200	return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2201	}
2202
2203	RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2204	{
2205	return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2206	}
2207
2208	Type *Short4::getType()
2209	{
2210	return T(Type_v4i16);
2211	}
2212
2213	UShort4::UShort4(RValue<Float4> cast, bool saturate)
2214	{
2215	if(saturate)
2216	{
2217	if(CPUID::SSE4_1)
2218	{
2219	// x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2220	// PackUnsigned takes care of 0x0000 saturation.
2221	Int4 int4(Min(cast, Float4 (`0xFFFF`)));
2222	*this = As<UShort4>(PackUnsigned(int4, int4));
2223	}
2224	else if(CPUID::ARM)
2225	{
2226	// ARM saturates the 32-bit integer result on overflow/undeflow.
2227	Int4 int4(cast);
2228	*this = As<UShort4>(PackUnsigned(int4, int4));
2229	}
2230	else
2231	{
2232	*this = Short4 (Int4 (Max(Min(cast, Float4 (`0xFFFF`)), Float4 (`0x0000`))));
2233	}
2234	}
2235	else
2236	{
2237	*this = Short4 (Int4 (cast));
2238	}
2239	}
2240
2241	RValue<UShort> Extract(RValue<UShort4> val, int i)
2242	{
2243	return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2244	}
2245
2246	RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2247	{
2248	return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2249	}
2250
2251	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2252	{
2253	if(emulateIntrinsics)
2254	{
2255	UShort4 result;
2256	result = Insert(result, Extract(lhs, `0`) << UShort (rhs), `0`);
2257	result = Insert(result, Extract(lhs, `1`) << UShort (rhs), `1`);
2258	result = Insert(result, Extract(lhs, `2`) << UShort (rhs), `2`);
2259	result = Insert(result, Extract(lhs, `3`) << UShort (rhs), `3`);
2260
2261	return result;
2262	}
2263	else
2264	{
2265	return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2266	}
2267	}
2268
2269	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2270	{
2271	if(emulateIntrinsics)
2272	{
2273	UShort4 result;
2274	result = Insert(result, Extract(lhs, `0`) >> UShort (rhs), `0`);
2275	result = Insert(result, Extract(lhs, `1`) >> UShort (rhs), `1`);
2276	result = Insert(result, Extract(lhs, `2`) >> UShort (rhs), `2`);
2277	result = Insert(result, Extract(lhs, `3`) >> UShort (rhs), `3`);
2278
2279	return result;
2280	}
2281	else
2282	{
2283	return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2284	}
2285	}
2286
2287	RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2288	{
2289	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2290	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2291	::basicBlock->appendInst(cmp);
2292
2293	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2294	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2295	::basicBlock->appendInst(select);
2296
2297	return RValue<UShort4>(V(result));
2298	}
2299
2300	RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2301	{
2302	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2303	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2304	::basicBlock->appendInst(cmp);
2305
2306	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2307	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2308	::basicBlock->appendInst(select);
2309
2310	return RValue<UShort4>(V(result));
2311	}
2312
2313	RValue<UShort> SaturateUnsigned(RValue<Int> x)
2314	{
2315	return UShort (IfThenElse(x > `0xFFFF`, Int (`0xFFFF`), IfThenElse(x < `0`, Int (`0`), x)));
2316	}
2317
2318	RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2319	{
2320	if(emulateIntrinsics)
2321	{
2322	UShort4 result;
2323	result = Insert(result, SaturateUnsigned(Int (Extract(x, `0`)) + Int (Extract(y, `0`))), `0`);
2324	result = Insert(result, SaturateUnsigned(Int (Extract(x, `1`)) + Int (Extract(y, `1`))), `1`);
2325	result = Insert(result, SaturateUnsigned(Int (Extract(x, `2`)) + Int (Extract(y, `2`))), `2`);
2326	result = Insert(result, SaturateUnsigned(Int (Extract(x, `3`)) + Int (Extract(y, `3`))), `3`);
2327
2328	return result;
2329	}
2330	else
2331	{
2332	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2333	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2334	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2335	auto paddusw = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2336	paddusw->addArg(x.value);
2337	paddusw->addArg(y.value);
2338	::basicBlock->appendInst(paddusw);
2339
2340	return RValue<UShort4>(V(result));
2341	}
2342	}
2343
2344	RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2345	{
2346	if(emulateIntrinsics)
2347	{
2348	UShort4 result;
2349	result = Insert(result, SaturateUnsigned(Int (Extract(x, `0`)) - Int (Extract(y, `0`))), `0`);
2350	result = Insert(result, SaturateUnsigned(Int (Extract(x, `1`)) - Int (Extract(y, `1`))), `1`);
2351	result = Insert(result, SaturateUnsigned(Int (Extract(x, `2`)) - Int (Extract(y, `2`))), `2`);
2352	result = Insert(result, SaturateUnsigned(Int (Extract(x, `3`)) - Int (Extract(y, `3`))), `3`);
2353
2354	return result;
2355	}
2356	else
2357	{
2358	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2359	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2360	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2361	auto psubusw = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2362	psubusw->addArg(x.value);
2363	psubusw->addArg(y.value);
2364	::basicBlock->appendInst(psubusw);
2365
2366	return RValue<UShort4>(V(result));
2367	}
2368	}
2369
2370	RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2371	{
2372	if(emulateIntrinsics)
2373	{
2374	UShort4 result;
2375	result = Insert(result, UShort ((UInt (Extract(x, `0`)) * UInt (Extract(y, `0`))) >> `16`), `0`);
2376	result = Insert(result, UShort ((UInt (Extract(x, `1`)) * UInt (Extract(y, `1`))) >> `16`), `1`);
2377	result = Insert(result, UShort ((UInt (Extract(x, `2`)) * UInt (Extract(y, `2`))) >> `16`), `2`);
2378	result = Insert(result, UShort ((UInt (Extract(x, `3`)) * UInt (Extract(y, `3`))) >> `16`), `3`);
2379
2380	return result;
2381	}
2382	else
2383	{
2384	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2385	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2386	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2387	auto pmulhuw = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2388	pmulhuw->addArg(x.value);
2389	pmulhuw->addArg(y.value);
2390	::basicBlock->appendInst(pmulhuw);
2391
2392	return RValue<UShort4>(V(result));
2393	}
2394	}
2395
2396	RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2397	{
2398	// TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2399
2400	// Scalarized implementation.
2401	Int4 result;
2402	result = Insert(result, Int ((Long (Extract(x, `0`)) * Long (Extract(y, `0`))) >> Long (Int (`32`))), `0`);
2403	result = Insert(result, Int ((Long (Extract(x, `1`)) * Long (Extract(y, `1`))) >> Long (Int (`32`))), `1`);
2404	result = Insert(result, Int ((Long (Extract(x, `2`)) * Long (Extract(y, `2`))) >> Long (Int (`32`))), `2`);
2405	result = Insert(result, Int ((Long (Extract(x, `3`)) * Long (Extract(y, `3`))) >> Long (Int (`32`))), `3`);
2406
2407	return result;
2408	}
2409
2410	RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2411	{
2412	// TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2413
2414	if(false) // Partial product based implementation.
2415	{
2416	auto xh = x >> `16`;
2417	auto yh = y >> `16`;
2418	auto xl = x & UInt4 (`0x0000FFFF`);
2419	auto yl = y & UInt4 (`0x0000FFFF`);
2420	auto xlyh = xl * yh;
2421	auto xhyl = xh * yl;
2422	auto xlyhh = xlyh >> `16`;
2423	auto xhylh = xhyl >> `16`;
2424	auto xlyhl = xlyh & UInt4 (`0x0000FFFF`);
2425	auto xhyll = xhyl & UInt4 (`0x0000FFFF`);
2426	auto xlylh = (xl * yl) >> `16`;
2427	auto oflow = (xlyhl + xhyll + xlylh) >> `16`;
2428
2429	return (xh * yh) + (xlyhh + xhylh) + oflow;
2430	}
2431
2432	// Scalarized implementation.
2433	Int4 result;
2434	result = Insert(result, Int ((Long (UInt (Extract(As<Int4>(x), `0`))) * Long (UInt (Extract(As<Int4>(y), `0`)))) >> Long (Int (`32`))), `0`);
2435	result = Insert(result, Int ((Long (UInt (Extract(As<Int4>(x), `1`))) * Long (UInt (Extract(As<Int4>(y), `1`)))) >> Long (Int (`32`))), `1`);
2436	result = Insert(result, Int ((Long (UInt (Extract(As<Int4>(x), `2`))) * Long (UInt (Extract(As<Int4>(y), `2`)))) >> Long (Int (`32`))), `2`);
2437	result = Insert(result, Int ((Long (UInt (Extract(As<Int4>(x), `3`))) * Long (UInt (Extract(As<Int4>(y), `3`)))) >> Long (Int (`32`))), `3`);
2438
2439	return As<UInt4>(result);
2440	}
2441
2442	RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2443	{
2444	UNIMPLEMENTED("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
2445	return UShort4 (`0`);
2446	}
2447
2448	Type *UShort4::getType()
2449	{
2450	return T(Type_v4i16);
2451	}
2452
2453	RValue<Short> Extract(RValue<Short8> val, int i)
2454	{
2455	return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2456	}
2457
2458	RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2459	{
2460	return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2461	}
2462
2463	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2464	{
2465	if(emulateIntrinsics)
2466	{
2467	Short8 result;
2468	result = Insert(result, Extract(lhs, `0`) << Short (rhs), `0`);
2469	result = Insert(result, Extract(lhs, `1`) << Short (rhs), `1`);
2470	result = Insert(result, Extract(lhs, `2`) << Short (rhs), `2`);
2471	result = Insert(result, Extract(lhs, `3`) << Short (rhs), `3`);
2472	result = Insert(result, Extract(lhs, `4`) << Short (rhs), `4`);
2473	result = Insert(result, Extract(lhs, `5`) << Short (rhs), `5`);
2474	result = Insert(result, Extract(lhs, `6`) << Short (rhs), `6`);
2475	result = Insert(result, Extract(lhs, `7`) << Short (rhs), `7`);
2476
2477	return result;
2478	}
2479	else
2480	{
2481	return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2482	}
2483	}
2484
2485	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2486	{
2487	if(emulateIntrinsics)
2488	{
2489	Short8 result;
2490	result = Insert(result, Extract(lhs, `0`) >> Short (rhs), `0`);
2491	result = Insert(result, Extract(lhs, `1`) >> Short (rhs), `1`);
2492	result = Insert(result, Extract(lhs, `2`) >> Short (rhs), `2`);
2493	result = Insert(result, Extract(lhs, `3`) >> Short (rhs), `3`);
2494	result = Insert(result, Extract(lhs, `4`) >> Short (rhs), `4`);
2495	result = Insert(result, Extract(lhs, `5`) >> Short (rhs), `5`);
2496	result = Insert(result, Extract(lhs, `6`) >> Short (rhs), `6`);
2497	result = Insert(result, Extract(lhs, `7`) >> Short (rhs), `7`);
2498
2499	return result;
2500	}
2501	else
2502	{
2503	return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2504	}
2505	}
2506
2507	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2508	{
2509	UNIMPLEMENTED("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
2510	return Int4 (`0`);
2511	}
2512
2513	RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2514	{
2515	UNIMPLEMENTED("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
2516	return Short8 (`0`);
2517	}
2518
2519	Type *Short8::getType()
2520	{
2521	return T(Ice::IceType_v8i16);
2522	}
2523
2524	RValue<UShort> Extract(RValue<UShort8> val, int i)
2525	{
2526	return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2527	}
2528
2529	RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2530	{
2531	return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2532	}
2533
2534	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2535	{
2536	if(emulateIntrinsics)
2537	{
2538	UShort8 result;
2539	result = Insert(result, Extract(lhs, `0`) << UShort (rhs), `0`);
2540	result = Insert(result, Extract(lhs, `1`) << UShort (rhs), `1`);
2541	result = Insert(result, Extract(lhs, `2`) << UShort (rhs), `2`);
2542	result = Insert(result, Extract(lhs, `3`) << UShort (rhs), `3`);
2543	result = Insert(result, Extract(lhs, `4`) << UShort (rhs), `4`);
2544	result = Insert(result, Extract(lhs, `5`) << UShort (rhs), `5`);
2545	result = Insert(result, Extract(lhs, `6`) << UShort (rhs), `6`);
2546	result = Insert(result, Extract(lhs, `7`) << UShort (rhs), `7`);
2547
2548	return result;
2549	}
2550	else
2551	{
2552	return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2553	}
2554	}
2555
2556	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2557	{
2558	if(emulateIntrinsics)
2559	{
2560	UShort8 result;
2561	result = Insert(result, Extract(lhs, `0`) >> UShort (rhs), `0`);
2562	result = Insert(result, Extract(lhs, `1`) >> UShort (rhs), `1`);
2563	result = Insert(result, Extract(lhs, `2`) >> UShort (rhs), `2`);
2564	result = Insert(result, Extract(lhs, `3`) >> UShort (rhs), `3`);
2565	result = Insert(result, Extract(lhs, `4`) >> UShort (rhs), `4`);
2566	result = Insert(result, Extract(lhs, `5`) >> UShort (rhs), `5`);
2567	result = Insert(result, Extract(lhs, `6`) >> UShort (rhs), `6`);
2568	result = Insert(result, Extract(lhs, `7`) >> UShort (rhs), `7`);
2569
2570	return result;
2571	}
2572	else
2573	{
2574	return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2575	}
2576	}
2577
2578	RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
2579	{
2580	UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
2581	return UShort8 (`0`);
2582	}
2583
2584	RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2585	{
2586	UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
2587	return UShort8 (`0`);
2588	}
2589
2590	// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
2591	// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
2592	// {
2593	// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
2594	// }
2595
2596	Type *UShort8::getType()
2597	{
2598	return T(Ice::IceType_v8i16);
2599	}
2600
2601	RValue<Int> operator++(Int &val, int) // Post-increment
2602	{
2603	RValue<Int> res = val;
2604	val += `1`;
2605	return res;
2606	}
2607
2608	const Int &operator++(Int &val) // Pre-increment
2609	{
2610	val += `1`;
2611	return val;
2612	}
2613
2614	RValue<Int> operator--(Int &val, int) // Post-decrement
2615	{
2616	RValue<Int> res = val;
2617	val -= `1`;
2618	return res;
2619	}
2620
2621	const Int &operator--(Int &val) // Pre-decrement
2622	{
2623	val -= `1`;
2624	return val;
2625	}
2626
2627	RValue<Int> RoundInt(RValue<Float> cast)
2628	{
2629	if(emulateIntrinsics \|\| CPUID::ARM)
2630	{
2631	// Push the fractional part off the mantissa. Accurate up to +/-2^22.
2632	return Int ((cast + Float (`0x00C00000`)) - Float (`0x00C00000`));
2633	}
2634	else
2635	{
2636	Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2637	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2638	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2639	auto nearbyint = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
2640	nearbyint->addArg(cast.value);
2641	::basicBlock->appendInst(nearbyint);
2642
2643	return RValue<Int>(V(result));
2644	}
2645	}
2646
2647	Type *Int::getType()
2648	{
2649	return T(Ice::IceType_i32);
2650	}
2651
2652	Type *Long::getType()
2653	{
2654	return T(Ice::IceType_i64);
2655	}
2656
2657	UInt::UInt(RValue<Float> cast)
2658	{
2659	// Smallest positive value representable in UInt, but not in Int
2660	const unsigned int ustart = `0x80000000u`;
2661	const float ustartf = float(ustart);
2662
2663	// If the value is negative, store 0, otherwise store the result of the conversion
2664	storeValue((~(As<Int>(cast) >> `31`) &
2665	// Check if the value can be represented as an Int
2666	IfThenElse(cast >= ustartf,
2667	// If the value is too large, subtract ustart and re-add it after conversion.
2668	As<Int>(As<UInt>(Int (cast - Float (ustartf))) + UInt (ustart)),
2669	// Otherwise, just convert normally
2670	Int (cast))).value);
2671	}
2672
2673	RValue<UInt> operator++(UInt &val, int) // Post-increment
2674	{
2675	RValue<UInt> res = val;
2676	val += `1`;
2677	return res;
2678	}
2679
2680	const UInt &operator++(UInt &val) // Pre-increment
2681	{
2682	val += `1`;
2683	return val;
2684	}
2685
2686	RValue<UInt> operator--(UInt &val, int) // Post-decrement
2687	{
2688	RValue<UInt> res = val;
2689	val -= `1`;
2690	return res;
2691	}
2692
2693	const UInt &operator--(UInt &val) // Pre-decrement
2694	{
2695	val -= `1`;
2696	return val;
2697	}
2698
2699	// RValue<UInt> RoundUInt(RValue<Float> cast)
2700	// {
2701	// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
2702	// }
2703
2704	Type *UInt::getType()
2705	{
2706	return T(Ice::IceType_i32);
2707	}
2708
2709	// Int2::Int2(RValue<Int> cast)
2710	// {
2711	// Value extend = Nucleus::createZExt(cast.value, Long::getType());*
2712	// Value vector = Nucleus::createBitCast(extend, Int2::getType());*
2713	//
2714	// Constant shuffle[2];*
2715	// shuffle[0] = Nucleus::createConstantInt(0);
2716	// shuffle[1] = Nucleus::createConstantInt(0);
2717	//
2718	// Value replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));*
2719	//
2720	// storeValue(replicate);
2721	// }
2722
2723	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
2724	{
2725	if(emulateIntrinsics)
2726	{
2727	Int2 result;
2728	result = Insert(result, Extract(lhs, `0`) << Int (rhs), `0`);
2729	result = Insert(result, Extract(lhs, `1`) << Int (rhs), `1`);
2730
2731	return result;
2732	}
2733	else
2734	{
2735	return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2736	}
2737	}
2738
2739	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
2740	{
2741	if(emulateIntrinsics)
2742	{
2743	Int2 result;
2744	result = Insert(result, Extract(lhs, `0`) >> Int (rhs), `0`);
2745	result = Insert(result, Extract(lhs, `1`) >> Int (rhs), `1`);
2746
2747	return result;
2748	}
2749	else
2750	{
2751	return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2752	}
2753	}
2754
2755	Type *Int2::getType()
2756	{
2757	return T(Type_v2i32);
2758	}
2759
2760	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
2761	{
2762	if(emulateIntrinsics)
2763	{
2764	UInt2 result;
2765	result = Insert(result, Extract(lhs, `0`) << UInt (rhs), `0`);
2766	result = Insert(result, Extract(lhs, `1`) << UInt (rhs), `1`);
2767
2768	return result;
2769	}
2770	else
2771	{
2772	return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2773	}
2774	}
2775
2776	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
2777	{
2778	if(emulateIntrinsics)
2779	{
2780	UInt2 result;
2781	result = Insert(result, Extract(lhs, `0`) >> UInt (rhs), `0`);
2782	result = Insert(result, Extract(lhs, `1`) >> UInt (rhs), `1`);
2783
2784	return result;
2785	}
2786	else
2787	{
2788	return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2789	}
2790	}
2791
2792	Type *UInt2::getType()
2793	{
2794	return T(Type_v2i32);
2795	}
2796
2797	Int4::Int4(RValue<Byte4> cast) : XYZW (this)
2798	{
2799	Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2800	Value *a = Nucleus::createInsertElement(loadValue(), x, `0`);
2801
2802	Value *e;
2803	int swizzle[`16`] = {`0`, `16`, `1`, `17`, `2`, `18`, `3`, `19`, `4`, `20`, `5`, `21`, `6`, `22`, `7`, `23`};
2804	Value *b = Nucleus::createBitCast(a, Byte16::getType());
2805	Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
2806
2807	int swizzle2[`8`] = {`0`, `8`, `1`, `9`, `2`, `10`, `3`, `11`};
2808	Value *d = Nucleus::createBitCast(c, Short8::getType());
2809	e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
2810
2811	Value *f = Nucleus::createBitCast(e, Int4::getType());
2812	storeValue(f);
2813	}
2814
2815	Int4::Int4(RValue<SByte4> cast) : XYZW (this)
2816	{
2817	Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2818	Value *a = Nucleus::createInsertElement(loadValue(), x, `0`);
2819
2820	int swizzle[`16`] = {`0`, `0`, `1`, `1`, `2`, `2`, `3`, `3`, `4`, `4`, `5`, `5`, `6`, `6`, `7`, `7`};
2821	Value *b = Nucleus::createBitCast(a, Byte16::getType());
2822	Value *c = Nucleus::createShuffleVector(b, b, swizzle);
2823
2824	int swizzle2[`8`] = {`0`, `0`, `1`, `1`, `2`, `2`, `3`, `3`};
2825	Value *d = Nucleus::createBitCast(c, Short8::getType());
2826	Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
2827
2828	*this = As<Int4>(e) >> `24`;
2829	}
2830
2831	Int4::Int4(RValue<Short4> cast) : XYZW (this)
2832	{
2833	int swizzle[`8`] = {`0`, `0`, `1`, `1`, `2`, `2`, `3`, `3`};
2834	Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2835
2836	*this = As<Int4>(c) >> `16`;
2837	}
2838
2839	Int4::Int4(RValue<UShort4> cast) : XYZW (this)
2840	{
2841	int swizzle[`8`] = {`0`, `8`, `1`, `9`, `2`, `10`, `3`, `11`};
2842	Value *c = Nucleus::createShuffleVector(cast.value, Short8 (`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`).loadValue(), swizzle);
2843	Value *d = Nucleus::createBitCast(c, Int4::getType());
2844	storeValue(d);
2845	}
2846
2847	Int4::Int4(RValue<Int> rhs) : XYZW (this)
2848	{
2849	Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
2850
2851	int swizzle[`4`] = {`0`, `0`, `0`, `0`};
2852	Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
2853
2854	storeValue(replicate);
2855	}
2856
2857	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
2858	{
2859	if(emulateIntrinsics)
2860	{
2861	Int4 result;
2862	result = Insert(result, Extract(lhs, `0`) << Int (rhs), `0`);
2863	result = Insert(result, Extract(lhs, `1`) << Int (rhs), `1`);
2864	result = Insert(result, Extract(lhs, `2`) << Int (rhs), `2`);
2865	result = Insert(result, Extract(lhs, `3`) << Int (rhs), `3`);
2866
2867	return result;
2868	}
2869	else
2870	{
2871	return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2872	}
2873	}
2874
2875	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
2876	{
2877	if(emulateIntrinsics)
2878	{
2879	Int4 result;
2880	result = Insert(result, Extract(lhs, `0`) >> Int (rhs), `0`);
2881	result = Insert(result, Extract(lhs, `1`) >> Int (rhs), `1`);
2882	result = Insert(result, Extract(lhs, `2`) >> Int (rhs), `2`);
2883	result = Insert(result, Extract(lhs, `3`) >> Int (rhs), `3`);
2884
2885	return result;
2886	}
2887	else
2888	{
2889	return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2890	}
2891	}
2892
2893	RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2894	{
2895	return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
2896	}
2897
2898	RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2899	{
2900	return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
2901	}
2902
2903	RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2904	{
2905	return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
2906	}
2907
2908	RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2909	{
2910	return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
2911	}
2912
2913	RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2914	{
2915	return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
2916	}
2917
2918	RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2919	{
2920	return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
2921	}
2922
2923	RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2924	{
2925	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2926	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2927	::basicBlock->appendInst(cmp);
2928
2929	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2930	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2931	::basicBlock->appendInst(select);
2932
2933	return RValue<Int4>(V(result));
2934	}
2935
2936	RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2937	{
2938	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2939	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2940	::basicBlock->appendInst(cmp);
2941
2942	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2943	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2944	::basicBlock->appendInst(select);
2945
2946	return RValue<Int4>(V(result));
2947	}
2948
2949	RValue<Int4> RoundInt(RValue<Float4> cast)
2950	{
2951	if(emulateIntrinsics \|\| CPUID::ARM)
2952	{
2953	// Push the fractional part off the mantissa. Accurate up to +/-2^22.
2954	return Int4 ((cast + Float4 (`0x00C00000`)) - Float4 (`0x00C00000`));
2955	}
2956	else
2957	{
2958	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2959	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2960	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2961	auto nearbyint = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
2962	nearbyint->addArg(cast.value);
2963	::basicBlock->appendInst(nearbyint);
2964
2965	return RValue<Int4>(V(result));
2966	}
2967	}
2968
2969	RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
2970	{
2971	if(emulateIntrinsics)
2972	{
2973	Short8 result;
2974	result = Insert(result, SaturateSigned(Extract(x, `0`)), `0`);
2975	result = Insert(result, SaturateSigned(Extract(x, `1`)), `1`);
2976	result = Insert(result, SaturateSigned(Extract(x, `2`)), `2`);
2977	result = Insert(result, SaturateSigned(Extract(x, `3`)), `3`);
2978	result = Insert(result, SaturateSigned(Extract(y, `0`)), `4`);
2979	result = Insert(result, SaturateSigned(Extract(y, `1`)), `5`);
2980	result = Insert(result, SaturateSigned(Extract(y, `2`)), `6`);
2981	result = Insert(result, SaturateSigned(Extract(y, `3`)), `7`);
2982
2983	return result;
2984	}
2985	else
2986	{
2987	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2988	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2989	auto target = ::context->getConstantUndef(Ice::IceType_i32);
2990	auto pack = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
2991	pack->addArg(x.value);
2992	pack->addArg(y.value);
2993	::basicBlock->appendInst(pack);
2994
2995	return RValue<Short8>(V(result));
2996	}
2997	}
2998
2999	RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3000	{
3001	if(emulateIntrinsics \|\| !(CPUID::SSE4_1 \|\| CPUID::ARM))
3002	{
3003	RValue<Int4> sx = As<Int4>(x);
3004	RValue<Int4> bx = (sx & ~(sx >> `31`)) - Int4 (`0x8000`);
3005
3006	RValue<Int4> sy = As<Int4>(y);
3007	RValue<Int4> by = (sy & ~(sy >> `31`)) - Int4 (`0x8000`);
3008
3009	return As<UShort8>(PackSigned(bx, by) + Short8 (`0x8000u`));
3010	}
3011	else
3012	{
3013	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3014	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3015	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3016	auto pack = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
3017	pack->addArg(x.value);
3018	pack->addArg(y.value);
3019	::basicBlock->appendInst(pack);
3020
3021	return RValue<UShort8>(V(result));
3022	}
3023	}
3024
3025	RValue<Int> SignMask(RValue<Int4> x)
3026	{
3027	if(emulateIntrinsics \|\| CPUID::ARM)
3028	{
3029	Int4 xx = (x >> `31`) & Int4 (`0x00000001`, `0x00000002`, `0x00000004`, `0x00000008`);
3030	return Extract(xx, `0`) \| Extract(xx, `1`) \| Extract(xx, `2`) \| Extract(xx, `3`);
3031	}
3032	else
3033	{
3034	Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3035	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3036	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3037	auto movmsk = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
3038	movmsk->addArg(x.value);
3039	::basicBlock->appendInst(movmsk);
3040
3041	return RValue<Int>(V(result));
3042	}
3043	}
3044
3045	Type *Int4::getType()
3046	{
3047	return T(Ice::IceType_v4i32);
3048	}
3049
3050	UInt4::UInt4(RValue<Float4> cast) : XYZW (this)
3051	{
3052	// Smallest positive value representable in UInt, but not in Int
3053	const unsigned int ustart = `0x80000000u`;
3054	const float ustartf = float(ustart);
3055
3056	// Check if the value can be represented as an Int
3057	Int4 uiValue = CmpNLT(cast, Float4 (ustartf));
3058	// If the value is too large, subtract ustart and re-add it after conversion.
3059	uiValue = (uiValue & As<Int4>(As<UInt4>(Int4 (cast - Float4 (ustartf))) + UInt4 (ustart))) \|
3060	// Otherwise, just convert normally
3061	(~uiValue & Int4 (cast));
3062	// If the value is negative, store 0, otherwise store the result of the conversion
3063	storeValue((~(As<Int4>(cast) >> `31`) & uiValue).value);
3064	}
3065
3066	UInt4::UInt4(RValue<UInt> rhs) : XYZW (this)
3067	{
3068	Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3069
3070	int swizzle[`4`] = {`0`, `0`, `0`, `0`};
3071	Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3072
3073	storeValue(replicate);
3074	}
3075
3076	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3077	{
3078	if(emulateIntrinsics)
3079	{
3080	UInt4 result;
3081	result = Insert(result, Extract(lhs, `0`) << UInt (rhs), `0`);
3082	result = Insert(result, Extract(lhs, `1`) << UInt (rhs), `1`);
3083	result = Insert(result, Extract(lhs, `2`) << UInt (rhs), `2`);
3084	result = Insert(result, Extract(lhs, `3`) << UInt (rhs), `3`);
3085
3086	return result;
3087	}
3088	else
3089	{
3090	return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3091	}
3092	}
3093
3094	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3095	{
3096	if(emulateIntrinsics)
3097	{
3098	UInt4 result;
3099	result = Insert(result, Extract(lhs, `0`) >> UInt (rhs), `0`);
3100	result = Insert(result, Extract(lhs, `1`) >> UInt (rhs), `1`);
3101	result = Insert(result, Extract(lhs, `2`) >> UInt (rhs), `2`);
3102	result = Insert(result, Extract(lhs, `3`) >> UInt (rhs), `3`);
3103
3104	return result;
3105	}
3106	else
3107	{
3108	return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3109	}
3110	}
3111
3112	RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3113	{
3114	return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3115	}
3116
3117	RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3118	{
3119	return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3120	}
3121
3122	RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3123	{
3124	return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3125	}
3126
3127	RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3128	{
3129	return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3130	}
3131
3132	RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3133	{
3134	return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3135	}
3136
3137	RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3138	{
3139	return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3140	}
3141
3142	RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3143	{
3144	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3145	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3146	::basicBlock->appendInst(cmp);
3147
3148	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3149	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3150	::basicBlock->appendInst(select);
3151
3152	return RValue<UInt4>(V(result));
3153	}
3154
3155	RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3156	{
3157	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3158	auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3159	::basicBlock->appendInst(cmp);
3160
3161	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3162	auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3163	::basicBlock->appendInst(select);
3164
3165	return RValue<UInt4>(V(result));
3166	}
3167
3168	Type *UInt4::getType()
3169	{
3170	return T(Ice::IceType_v4i32);
3171	}
3172
3173	Type *Half::getType()
3174	{
3175	return T(Ice::IceType_i16);
3176	}
3177
3178	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3179	{
3180	return `1.0f` / x;
3181	}
3182
3183	RValue<Float> RcpSqrt_pp(RValue<Float> x)
3184	{
3185	return Rcp_pp(Sqrt(x));
3186	}
3187
3188	RValue<Float> Sqrt(RValue<Float> x)
3189	{
3190	Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
3191	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3192	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3193	auto sqrt = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
3194	sqrt->addArg(x.value);
3195	::basicBlock->appendInst(sqrt);
3196
3197	return RValue<Float>(V(result));
3198	}
3199
3200	RValue<Float> Round(RValue<Float> x)
3201	{
3202	return Float4 (Round(Float4 (x))).x;
3203	}
3204
3205	RValue<Float> Trunc(RValue<Float> x)
3206	{
3207	return Float4 (Trunc(Float4 (x))).x;
3208	}
3209
3210	RValue<Float> Frac(RValue<Float> x)
3211	{
3212	return Float4 (Frac(Float4 (x))).x;
3213	}
3214
3215	RValue<Float> Floor(RValue<Float> x)
3216	{
3217	return Float4 (Floor(Float4 (x))).x;
3218	}
3219
3220	RValue<Float> Ceil(RValue<Float> x)
3221	{
3222	return Float4 (Ceil(Float4 (x))).x;
3223	}
3224
3225	Type *Float::getType()
3226	{
3227	return T(Ice::IceType_f32);
3228	}
3229
3230	Type *Float2::getType()
3231	{
3232	return T(Type_v2f32);
3233	}
3234
3235	Float4::Float4(RValue<Float> rhs) : XYZW (this)
3236	{
3237	Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3238
3239	int swizzle[`4`] = {`0`, `0`, `0`, `0`};
3240	Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3241
3242	storeValue(replicate);
3243	}
3244
3245	RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3246	{
3247	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3248	auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3249	::basicBlock->appendInst(cmp);
3250
3251	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3252	auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3253	::basicBlock->appendInst(select);
3254
3255	return RValue<Float4>(V(result));
3256	}
3257
3258	RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3259	{
3260	Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3261	auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3262	::basicBlock->appendInst(cmp);
3263
3264	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3265	auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3266	::basicBlock->appendInst(select);
3267
3268	return RValue<Float4>(V(result));
3269	}
3270
3271	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3272	{
3273	return Float4 (`1.0f`) / x;
3274	}
3275
3276	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3277	{
3278	return Rcp_pp(Sqrt(x));
3279	}
3280
3281	RValue<Float4> Sqrt(RValue<Float4> x)
3282	{
3283	if(emulateIntrinsics \|\| CPUID::ARM)
3284	{
3285	Float4 result;
3286	result.x = Sqrt(Float (Float4 (x).x));
3287	result.y = Sqrt(Float (Float4 (x).y));
3288	result.z = Sqrt(Float (Float4 (x).z));
3289	result.w = Sqrt(Float (Float4 (x).w));
3290
3291	return result;
3292	}
3293	else
3294	{
3295	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3296	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3297	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3298	auto sqrt = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
3299	sqrt->addArg(x.value);
3300	::basicBlock->appendInst(sqrt);
3301
3302	return RValue<Float4>(V(result));
3303	}
3304	}
3305
3306	RValue<Int> SignMask(RValue<Float4> x)
3307	{
3308	if(emulateIntrinsics \|\| CPUID::ARM)
3309	{
3310	Int4 xx = (As<Int4>(x) >> `31`) & Int4 (`0x00000001`, `0x00000002`, `0x00000004`, `0x00000008`);
3311	return Extract(xx, `0`) \| Extract(xx, `1`) \| Extract(xx, `2`) \| Extract(xx, `3`);
3312	}
3313	else
3314	{
3315	Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3316	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3317	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3318	auto movmsk = Ice::InstIntrinsicCall::create(::function, `1`, result, target, intrinsic);
3319	movmsk->addArg(x.value);
3320	::basicBlock->appendInst(movmsk);
3321
3322	return RValue<Int>(V(result));
3323	}
3324	}
3325
3326	RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3327	{
3328	return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3329	}
3330
3331	RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3332	{
3333	return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3334	}
3335
3336	RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3337	{
3338	return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3339	}
3340
3341	RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3342	{
3343	return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3344	}
3345
3346	RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3347	{
3348	return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3349	}
3350
3351	RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3352	{
3353	return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3354	}
3355
3356	RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3357	{
3358	return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3359	}
3360
3361	RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3362	{
3363	return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3364	}
3365
3366	RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3367	{
3368	return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3369	}
3370
3371	RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3372	{
3373	return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3374	}
3375
3376	RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3377	{
3378	return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3379	}
3380
3381	RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3382	{
3383	return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3384	}
3385
3386	RValue<Float4> Round(RValue<Float4> x)
3387	{
3388	if(emulateIntrinsics \|\| CPUID::ARM)
3389	{
3390	// Push the fractional part off the mantissa. Accurate up to +/-2^22.
3391	return (x + Float4 (`0x00C00000`)) - Float4 (`0x00C00000`);
3392	}
3393	else if(CPUID::SSE4_1)
3394	{
3395	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3396	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3397	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3398	auto round = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
3399	round->addArg(x.value);
3400	round->addArg(::context->getConstantInt32(`0`));
3401	::basicBlock->appendInst(round);
3402
3403	return RValue<Float4>(V(result));
3404	}
3405	else
3406	{
3407	return Float4 (RoundInt(x));
3408	}
3409	}
3410
3411	RValue<Float4> Trunc(RValue<Float4> x)
3412	{
3413	if(CPUID::SSE4_1)
3414	{
3415	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3416	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3417	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3418	auto round = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
3419	round->addArg(x.value);
3420	round->addArg(::context->getConstantInt32(`3`));
3421	::basicBlock->appendInst(round);
3422
3423	return RValue<Float4>(V(result));
3424	}
3425	else
3426	{
3427	return Float4 (Int4 (x));
3428	}
3429	}
3430
3431	RValue<Float4> Frac(RValue<Float4> x)
3432	{
3433	Float4 frc;
3434
3435	if(CPUID::SSE4_1)
3436	{
3437	frc = x - Floor(x);
3438	}
3439	else
3440	{
3441	frc = x - Float4 (Int4 (x)); // Signed fractional part.
3442
3443	frc += As<Float4>(As<Int4>(CmpNLE(Float4 (`0.0f`), frc)) & As<Int4>(Float4 (`1`, `1`, `1`, `1`))); // Add 1.0 if negative.
3444	}
3445
3446	// x - floor(x) can be 1.0 for very small negative x.
3447	// Clamp against the value just below 1.0.
3448	return Min(frc, As<Float4>(Int4 (`0x3F7FFFFF`)));
3449	}
3450
3451	RValue<Float4> Floor(RValue<Float4> x)
3452	{
3453	if(CPUID::SSE4_1)
3454	{
3455	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3456	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3457	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3458	auto round = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
3459	round->addArg(x.value);
3460	round->addArg(::context->getConstantInt32(`1`));
3461	::basicBlock->appendInst(round);
3462
3463	return RValue<Float4>(V(result));
3464	}
3465	else
3466	{
3467	return x - Frac(x);
3468	}
3469	}
3470
3471	RValue<Float4> Ceil(RValue<Float4> x)
3472	{
3473	if(CPUID::SSE4_1)
3474	{
3475	Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3476	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3477	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3478	auto round = Ice::InstIntrinsicCall::create(::function, `2`, result, target, intrinsic);
3479	round->addArg(x.value);
3480	round->addArg(::context->getConstantInt32(`2`));
3481	::basicBlock->appendInst(round);
3482
3483	return RValue<Float4>(V(result));
3484	}
3485	else
3486	{
3487	return -Floor(-x);
3488	}
3489	}
3490
3491	Type *Float4::getType()
3492	{
3493	return T(Ice::IceType_v4f32);
3494	}
3495
3496	RValue<Long> Ticks()
3497	{
3498	UNIMPLEMENTED("RValue<Long> Ticks()");
3499	return Long (Int (`0`));
3500	}
3501
3502	RValue<Pointer<Byte>> ConstantPointer(void const * ptr)
3503	{
3504	if (sizeof(void*) == `8`)
3505	{
3506	return RValue<Pointer<Byte>>(V(::context->getConstantInt64(reinterpret_cast<intptr_t>(ptr))));
3507	}
3508	else
3509	{
3510	return RValue<Pointer<Byte>>(V(::context->getConstantInt32(reinterpret_cast<intptr_t>(ptr))));
3511	}
3512	}
3513
3514	RValue<Pointer<Byte>> ConstantData(void const * data, size_t size)
3515	{
3516	// TODO: Try to use Ice::VariableDeclaration::DataInitializer and
3517	// getConstantSym instead of tagging data on the routine.
3518	return ConstantPointer(::routine->addConstantData(data, size));
3519	}
3520
3521	Value* Call(RValue<Pointer<Byte>> fptr, Type* retTy, std::initializer_list<Value> args, std::initializer_list<Type> argTys)
3522	{
3523	// FIXME: This does not currently work on Windows.
3524	Ice::Variable ret = nullptr*;
3525	if (retTy != nullptr)
3526	{
3527	ret = ::function->makeVariable(T(retTy));
3528	}
3529	auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
3530	for (auto arg : args)
3531	{
3532	call->addArg(V(arg));
3533	}
3534	::basicBlock->appendInst(call);
3535	return V(ret);
3536	}
3537
3538	void Breakpoint()
3539	{
3540	const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3541	auto target = ::context->getConstantUndef(Ice::IceType_i32);
3542	auto trap = Ice::InstIntrinsicCall::create(::function, `0`, nullptr, target, intrinsic);
3543	::basicBlock->appendInst(trap);
3544	}
3545
3546	// Below are functions currently unimplemented for the Subzero backend.
3547	// They are stubbed to satisfy the linker.
3548	void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
3549	Value Nucleus::createMaskedLoad(Value ptr, Type elTy, Value mask, unsigned int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createMaskedLoad()"); return nullptr; }
3550	void Nucleus::createMaskedStore(Value ptr, Value val, Value mask, unsigned* int alignment) { UNIMPLEMENTED("Subzero createMaskedStore()"); }
3551	Value Nucleus::createGather(Value base, Type elTy, Value offsets, Value mask, unsigned* int alignment, bool zeroMaskedLanes) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
3552	void Nucleus::createScatter(Value base, Value val, Value offsets, Value mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); }
3553	RValue<Float> Exp2(RValue<Float> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float (`0`); }
3554	RValue<Float> Log2(RValue<Float> x) { UNIMPLEMENTED("Subzero Log2()"); return Float (`0`); }
3555	RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4 (`0`); }
3556	RValue<Float4> Cos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cos()"); return Float4 (`0`); }
3557	RValue<Float4> Tan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tan()"); return Float4 (`0`); }
3558	RValue<Float4> Asin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asin()"); return Float4 (`0`); }
3559	RValue<Float4> Acos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acos()"); return Float4 (`0`); }
3560	RValue<Float4> Atan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atan()"); return Float4 (`0`); }
3561	RValue<Float4> Sinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sinh()"); return Float4 (`0`); }
3562	RValue<Float4> Cosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cosh()"); return Float4 (`0`); }
3563	RValue<Float4> Tanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tanh()"); return Float4 (`0`); }
3564	RValue<Float4> Asinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asinh()"); return Float4 (`0`); }
3565	RValue<Float4> Acosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acosh()"); return Float4 (`0`); }
3566	RValue<Float4> Atanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atanh()"); return Float4 (`0`); }
3567	RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Atan2()"); return Float4 (`0`); }
3568	RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Pow()"); return Float4 (`0`); }
3569	RValue<Float4> Exp(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp()"); return Float4 (`0`); }
3570	RValue<Float4> Log(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log()"); return Float4 (`0`); }
3571	RValue<Float4> Exp2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float4 (`0`); }
3572	RValue<Float4> Log2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log2()"); return Float4 (`0`); }
3573	RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt (`0`); }
3574	RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt4 (`0`); }
3575	RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt (`0`); }
3576	RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt4 (`0`); }
3577
3578	void EmitDebugLocation() {}
3579	void EmitDebugVariable(Value* value) {}
3580	void FlushDebug() {}
3581
3582	void Nucleus::createCoroutine(Type YieldType, std::vector<Type> &Params) { UNIMPLEMENTED("createCoroutine"); }
3583	std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char name, const* Config::Edit &cfgEdit / = Config::Edit::None /) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; }
3584	void Nucleus::yield(Value* val) { UNIMPLEMENTED("Yield"); }
3585
3586	}
3587

Browse the source code of engine/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp