Add SwiftShader source to repo

Oct 6 code drop from Transgaming
Review URL: https://chromereviews.googleplex.com/3846015
diff --git a/src/Reactor/Nucleus.cpp b/src/Reactor/Nucleus.cpp
new file mode 100644
index 0000000..3c793a1
--- /dev/null
+++ b/src/Reactor/Nucleus.cpp
@@ -0,0 +1,7063 @@
+// SwiftShader Software Renderer
+//
+// Copyright(c) 2005-2011 TransGaming Inc.
+//
+// All rights reserved. No part of this software may be copied, distributed, transmitted,
+// transcribed, stored in a retrieval system, translated into any human or computer
+// language by any means, or disclosed to third parties without the explicit written
+// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
+// or implied, including but not limited to any patent rights, are granted to you.
+//
+
+#include "Nucleus.hpp"
+
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Constants.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Passmanager.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
+#include "../lib/ExecutionEngine/JIT/JIT.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/CallingConv.h"
+
+#include "MemoryManager.hpp"
+#include "x86.hpp"
+#include "CPUID.hpp"
+#include "Thread.hpp"
+#include "Memory.hpp"
+
+#include <fstream>
+
+extern "C" void LLVMInitializeX86Target();
+extern "C" void LLVMInitializeX86TargetInfo();
+
+extern "C"
+{
+	bool (*CodeAnalystInitialize)() = 0;
+	void (*CodeAnalystCompleteJITLog)() = 0;
+	bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
+}
+
+namespace llvm
+{
+	extern bool JITEmitDebugInfo;
+}
+
+namespace sw
+{
+	Optimization optimization[10] = {InstructionCombining, Disabled};
+
+	using namespace llvm;
+
+	MemoryManager *Nucleus::memoryManager = 0;
+	ExecutionEngine *Nucleus::executionEngine = 0;
+	Builder *Nucleus::builder = 0;
+	LLVMContext *Nucleus::context = 0;
+	Module *Nucleus::module = 0;
+	llvm::Function *Nucleus::function = 0;
+
+	class Builder : public IRBuilder<>
+	{
+	};
+
+	Routine::Routine(int bufferSize) : bufferSize(bufferSize), dynamic(true)
+	{
+		void *memory = allocateExecutable(bufferSize);
+
+		buffer = memory;
+		entry = memory;
+		functionSize = bufferSize;   // Updated by MemoryManager::endFunctionBody
+
+		bindCount = 0;
+	}
+
+	Routine::Routine(void *memory, int bufferSize, int offset) : bufferSize(bufferSize), functionSize(bufferSize), dynamic(false)
+	{
+		buffer = (unsigned char*)memory - offset;
+		entry = memory;
+
+		bindCount = 0;
+	}
+
+	Routine::~Routine()
+	{
+		if(dynamic)
+		{
+			deallocateExecutable(buffer, bufferSize);
+		}
+	}
+
+	void Routine::setFunctionSize(int functionSize)
+	{
+		this->functionSize = functionSize;
+	}
+
+	const void *Routine::getBuffer()
+	{
+		return buffer;
+	}
+
+	const void *Routine::getEntry()
+	{
+		return entry;
+	}
+
+	int Routine::getBufferSize()
+	{
+		return bufferSize;
+	}
+
+	int Routine::getFunctionSize()
+	{
+		return functionSize;
+	}
+
+	int Routine::getCodeSize()
+	{
+		return functionSize - ((uintptr_t)entry - (uintptr_t)buffer);
+	}
+
+	bool Routine::isDynamic()
+	{
+		return dynamic;
+	}
+
+	void Routine::bind()
+	{
+		atomicIncrement(&bindCount);
+	}
+
+	void Routine::unbind()
+	{
+		long count = atomicDecrement(&bindCount);
+
+		if(count == 0)
+		{
+			delete this;
+		}
+	}
+
+	Nucleus::Nucleus()
+	{
+		LLVMInitializeX86Target();
+		LLVMInitializeX86TargetInfo();
+		JITEmitDebugInfo = false;
+
+		if(!context)
+		{
+			context = new LLVMContext();
+		}
+
+		module = new Module("", *context);
+		memoryManager = new MemoryManager();
+		
+		#if defined(__x86_64__)
+			const char *architecture = "x86-64";
+		#else
+			const char *architecture = "x86";
+		#endif
+
+		SmallVector<std::string, 1> MAttrs;
+		MAttrs.push_back(CPUID::supportsMMX()    ? "+mmx"   : "-mmx");
+		MAttrs.push_back(CPUID::supportsCMOV()   ? "+cmov"  : "-cmov");
+		MAttrs.push_back(CPUID::supportsSSE()    ? "+sse"   : "-sse");
+		MAttrs.push_back(CPUID::supportsSSE2()   ? "+sse2"  : "-sse2");
+		MAttrs.push_back(CPUID::supportsSSE3()   ? "+sse3"  : "-sse3");
+		MAttrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3" : "-ssse3");
+		MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
+
+		executionEngine = JIT::createJIT(module, 0, memoryManager, CodeGenOpt::Aggressive, true, CodeModel::Default, architecture, "", MAttrs);
+
+		if(!builder)
+		{
+			builder = static_cast<Builder*>(new IRBuilder<>(*context));
+
+			HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
+			if(CodeAnalyst)
+			{
+				CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
+				CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
+				CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
+			
+				CodeAnalystInitialize();
+			}
+		}
+	}
+
+	Nucleus::~Nucleus()
+	{
+		delete executionEngine;
+		executionEngine = 0;
+
+		memoryManager = 0;
+		function = 0;
+		module = 0;
+	}
+
+	Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
+	{
+		#if !(defined(_M_AMD64) || defined(_M_X64))
+			x86::emms();
+		#endif
+
+		Nucleus::createRetVoid();
+
+		if(false)
+		{
+			module->print(raw_fd_ostream("llvm-dump-unopt.txt", std::string()), 0);
+		}
+
+		if(runOptimizations)
+		{
+			optimize();
+		}
+
+		if(false)
+		{
+			module->print(raw_fd_ostream("llvm-dump-opt.txt", std::string()), 0);
+		}
+
+		void *entry = executionEngine->getPointerToFunction(function);
+
+		Routine *routine = memoryManager->acquireRoutine();
+		routine->entry = entry;
+		markExecutable(routine->buffer, routine->bufferSize);
+
+		if(CodeAnalystLogJITCode)
+		{
+			CodeAnalystLogJITCode(routine->entry, routine->getCodeSize(), name);
+		}
+
+		return routine;
+	}
+
+	void Nucleus::optimize()
+	{
+		static PassManager *passManager = 0;
+		
+		if(!passManager)
+		{
+			passManager = new PassManager();
+
+			UnsafeFPMath = true;
+		//	NoInfsFPMath = true;
+		//	NoNaNsFPMath = true;
+
+			passManager->add(new TargetData(*executionEngine->getTargetData()));
+			passManager->add(createScalarReplAggregatesPass());
+
+			for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
+			{
+				switch(optimization[pass])
+				{
+				case Disabled:                                                                 break;
+				case CFGSimplification:    passManager->add(createCFGSimplificationPass());    break;
+				case LICM:                 passManager->add(createLICMPass());                 break;
+				case AggressiveDCE:        passManager->add(createAggressiveDCEPass());        break;
+				case GVN:                  passManager->add(createGVNPass());                  break;
+				case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
+				case Reassociate:          passManager->add(createReassociatePass());          break;
+				case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
+				case SCCP:                 passManager->add(createSCCPPass());                 break;
+				default:
+					assert(false);
+				}
+			}
+		}
+
+		passManager->run(*module);
+	}
+
+	void Nucleus::setFunction(llvm::Function *function)
+	{
+		Nucleus::function = function;
+
+		builder->SetInsertPoint(BasicBlock::Create(*context, function));
+	}
+
+	Module *Nucleus::getModule()
+	{
+		return module;
+	}
+
+	Builder *Nucleus::getBuilder()
+	{
+		return builder;
+	}
+
+	llvm::Function *Nucleus::getFunction()
+	{
+		return function;
+	}
+
+	llvm::LLVMContext *Nucleus::getContext()
+	{
+		return context;
+	}
+
+	Value *Nucleus::allocateStackVariable(const Type *type, int arraySize)
+	{
+		// Need to allocate it in the entry block for mem2reg to work
+		llvm::Function *function = getFunction();
+		BasicBlock &entryBlock = function->getEntryBlock();
+
+		Instruction *declaration;
+
+		if(arraySize)
+		{
+			declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
+		}
+		else
+		{
+			declaration = new AllocaInst(type, (Value*)0);
+		}
+
+		entryBlock.getInstList().push_front(declaration);
+
+		return declaration;
+	}
+
+	BasicBlock *Nucleus::createBasicBlock()
+	{
+		return BasicBlock::Create(*context, Nucleus::getFunction());
+	}
+
+	BasicBlock *Nucleus::getInsertBlock()
+	{
+		return builder->GetInsertBlock();
+	}
+
+	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
+	{
+		return builder->SetInsertPoint(basicBlock);
+	}
+
+	BasicBlock *Nucleus::getPredecessor(BasicBlock *basicBlock)
+	{
+		return *pred_begin(basicBlock);
+	}
+
+	llvm::Function *Nucleus::createFunction(const llvm::Type *ReturnType, const std::vector<const llvm::Type*> &Params)
+	{
+		llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, Params, false);
+		llvm::Function *function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", Nucleus::getModule());
+		function->setCallingConv(llvm::CallingConv::C);
+
+		return function;
+	}
+
+	llvm::Argument *Nucleus::getArgument(llvm::Function *function, unsigned int index)
+	{
+		llvm::Function::arg_iterator args = function->arg_begin();
+
+		while(index)
+		{
+			args++;
+			index--;
+		}
+
+		return &*args;
+	}
+
+	Value *Nucleus::createRetVoid()
+	{
+		return builder->CreateRetVoid();
+	}
+
+	Value *Nucleus::createRet(Value *V)
+	{
+		return builder->CreateRet(V);
+	}
+
+	Value *Nucleus::createBr(BasicBlock *dest)
+	{
+		return builder->CreateBr(dest);
+	}
+
+	Value *Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
+	{
+		return builder->CreateCondBr(cond, ifTrue, ifFalse);
+	}
+
+	Value *Nucleus::createAdd(Value *lhs, Value *rhs)
+	{
+		return builder->CreateAdd(lhs, rhs);
+	}
+
+	Value *Nucleus::createSub(Value *lhs, Value *rhs)
+	{
+		return builder->CreateSub(lhs, rhs);
+	}
+
+	Value *Nucleus::createMul(Value *lhs, Value *rhs)
+	{
+		return builder->CreateMul(lhs, rhs);
+	}
+
+	Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
+	{
+		return builder->CreateUDiv(lhs, rhs);
+	}
+
+	Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
+	{
+		return builder->CreateSDiv(lhs, rhs);
+	}
+
+	Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFAdd(lhs, rhs);
+	}
+
+	Value *Nucleus::createFSub(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFSub(lhs, rhs);
+	}
+
+	Value *Nucleus::createFMul(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFMul(lhs, rhs);
+	}
+
+	Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFDiv(lhs, rhs);
+	}
+
+	Value *Nucleus::createURem(Value *lhs, Value *rhs)
+	{
+		return builder->CreateURem(lhs, rhs);
+	}
+
+	Value *Nucleus::createSRem(Value *lhs, Value *rhs)
+	{
+		return builder->CreateSRem(lhs, rhs);
+	}
+
+	Value *Nucleus::createFRem(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFRem(lhs, rhs);
+	}
+
+	Value *Nucleus::createShl(Value *lhs, Value *rhs)
+	{
+		return builder->CreateShl(lhs, rhs);
+	}
+
+	Value *Nucleus::createLShr(Value *lhs, Value *rhs)
+	{
+		return builder->CreateLShr(lhs, rhs);
+	}
+
+	Value *Nucleus::createAShr(Value *lhs, Value *rhs)
+	{
+		return builder->CreateAShr(lhs, rhs);
+	}
+
+	Value *Nucleus::createAnd(Value *lhs, Value *rhs)
+	{
+		return builder->CreateAnd(lhs, rhs);
+	}
+
+	Value *Nucleus::createOr(Value *lhs, Value *rhs)
+	{
+		return builder->CreateOr(lhs, rhs);
+	}
+
+	Value *Nucleus::createXor(Value *lhs, Value *rhs)
+	{
+		return builder->CreateXor(lhs, rhs);
+	}
+
+	Value *Nucleus::createNeg(Value *V)
+	{
+		return builder->CreateNeg(V);
+	}
+
+	Value *Nucleus::createFNeg(Value *V)
+	{
+		return builder->CreateFNeg(V);
+	}
+
+	Value *Nucleus::createNot(Value *V)
+	{
+		return builder->CreateNot(V);
+	}
+
+	Value *Nucleus::createLoad(Value *ptr, bool isVolatile, unsigned int align)
+	{
+		return builder->Insert(new LoadInst(ptr, isVolatile, align));
+	}
+
+	Value *Nucleus::createStore(Value *value, Value *ptr, bool isVolatile, unsigned int align)
+	{
+		return builder->Insert(new StoreInst(value, ptr, isVolatile, align));
+	}
+
+	Value *Nucleus::createGEP(Value *ptr, Value *index)
+	{
+		return builder->CreateGEP(ptr, index);
+	}
+
+	Value *Nucleus::createTrunc(Value *V, const Type *destType)
+	{
+		return builder->CreateTrunc(V, destType);
+	}
+
+	Value *Nucleus::createZExt(Value *V, const Type *destType)
+	{
+		return builder->CreateZExt(V, destType);
+	}
+
+	Value *Nucleus::createSExt(Value *V, const Type *destType)
+	{
+		return builder->CreateSExt(V, destType);
+	}
+
+	Value *Nucleus::createFPToUI(Value *V, const Type *destType)
+	{
+		return builder->CreateFPToUI(V, destType);
+	}
+
+	Value *Nucleus::createFPToSI(Value *V, const Type *destType)
+	{
+		return builder->CreateFPToSI(V, destType);
+	}
+
+	Value *Nucleus::createUIToFP(Value *V, const Type *destType)
+	{
+		return builder->CreateUIToFP(V, destType);
+	}
+
+	Value *Nucleus::createSIToFP(Value *V, const Type *destType)
+	{
+		return builder->CreateSIToFP(V, destType);
+	}
+
+	Value *Nucleus::createFPTrunc(Value *V, const Type *destType)
+	{
+		return builder->CreateFPTrunc(V, destType);
+	}
+
+	Value *Nucleus::createFPExt(Value *V, const Type *destType)
+	{
+		return builder->CreateFPExt(V, destType);
+	}
+
+	Value *Nucleus::createPtrToInt(Value *V, const Type *destType)
+	{
+		return builder->CreatePtrToInt(V, destType);
+	}
+
+	Value *Nucleus::createIntToPtr(Value *V, const Type *destType)
+	{
+		return builder->CreateIntToPtr(V, destType);
+	}
+
+	Value *Nucleus::createBitCast(Value *V, const Type *destType)
+	{
+		return builder->CreateBitCast(V, destType);
+	}
+
+	Value *Nucleus::createIntCast(Value *V, const Type *destType, bool isSigned)
+	{
+		return builder->CreateIntCast(V, destType, isSigned);
+	}
+
+	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpEQ(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpNE(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpUGT(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpUGE(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpULT(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpULE(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpSGT(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpSGE(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpSLT(lhs, rhs);
+	}
+
+	Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateICmpSLE(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpOEQ(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpOGT(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpOGE(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpOLT(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpOLE(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpONE(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpORD(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpUNO(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpUEQ(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpUGT(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpUGE(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpULT(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpULE(lhs, rhs);
+	}
+
+	Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
+	{
+		return builder->CreateFCmpULE(lhs, rhs);
+	}
+
+	Value *Nucleus::createCall(Value *callee)
+	{
+		return builder->CreateCall(callee);
+	}
+
+	Value *Nucleus::createCall(Value *callee, Value *arg)
+	{
+		return builder->CreateCall(callee, arg);
+	}
+
+	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2)
+	{
+		return builder->CreateCall2(callee, arg1, arg2);
+	}
+
+	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3)
+	{
+		return builder->CreateCall3(callee, arg1, arg2, arg3);
+	}
+
+	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3, Value *arg4)
+	{
+		return builder->CreateCall4(callee, arg1, arg2, arg3, arg4);
+	}
+
+	Value *Nucleus::createExtractElement(Value *vector, int index)
+	{
+		return builder->CreateExtractElement(vector, createConstantInt(index));
+	}
+
+	Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
+	{
+		return builder->CreateInsertElement(vector, element, createConstantInt(index));
+	}
+
+	Value *Nucleus::createShuffleVector(Value *V1, Value *V2, Value *mask)
+	{
+		return builder->CreateShuffleVector(V1, V2, mask);
+	}
+
+	Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
+	{
+		return builder->CreateSelect(C, ifTrue, ifFalse);
+	}
+
+	Value *Nucleus::createSwitch(llvm::Value *V, llvm::BasicBlock *Dest, unsigned NumCases)
+	{
+		return builder->CreateSwitch(V, Dest, NumCases);
+	}
+
+	void Nucleus::addSwitchCase(llvm::Value *Switch, int Case, llvm::BasicBlock *Branch)
+	{
+		static_cast<SwitchInst*>(Switch)->addCase(Nucleus::createConstantInt(Case), Branch);
+	}
+
+	Value *Nucleus::createUnreachable()
+	{
+		return builder->CreateUnreachable();
+	}
+
+	Value *Nucleus::createSwizzle(Value *val, unsigned char select)
+	{
+		Constant *swizzle[4];
+		swizzle[0] = Nucleus::createConstantInt((select >> 0) & 0x03);
+		swizzle[1] = Nucleus::createConstantInt((select >> 2) & 0x03);
+		swizzle[2] = Nucleus::createConstantInt((select >> 4) & 0x03);
+		swizzle[3] = Nucleus::createConstantInt((select >> 6) & 0x03);
+
+		Value *shuffle = Nucleus::createShuffleVector(val, UndefValue::get(val->getType()), Nucleus::createConstantVector(swizzle, 4));
+
+		return shuffle;
+	}
+
+	Value *Nucleus::createMask(Value *lhs, Value *rhs, unsigned char select)
+	{
+		bool mask[4] = {false, false, false, false};
+
+		mask[(select >> 0) & 0x03] = true;
+		mask[(select >> 2) & 0x03] = true;
+		mask[(select >> 4) & 0x03] = true;
+		mask[(select >> 6) & 0x03] = true;
+
+		Constant *swizzle[4];
+		swizzle[0] = Nucleus::createConstantInt(mask[0] ? 4 : 0);
+		swizzle[1] = Nucleus::createConstantInt(mask[1] ? 5 : 1);
+		swizzle[2] = Nucleus::createConstantInt(mask[2] ? 6 : 2);
+		swizzle[3] = Nucleus::createConstantInt(mask[3] ? 7 : 3);
+
+		Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, Nucleus::createConstantVector(swizzle, 4));
+
+		return shuffle;
+	}
+
+	const llvm::GlobalValue *Nucleus::getGlobalValueAtAddress(void *Addr)
+	{
+		return executionEngine->getGlobalValueAtAddress(Addr);
+	}
+
+	void Nucleus::addGlobalMapping(const llvm::GlobalValue *GV, void *Addr)
+	{
+		executionEngine->addGlobalMapping(GV, Addr);
+	}
+
+	llvm::GlobalValue *Nucleus::createGlobalValue(const llvm::Type *Ty, bool isConstant, unsigned int Align)
+	{
+		llvm::GlobalValue *global = new llvm::GlobalVariable(Ty, isConstant, llvm::GlobalValue::ExternalLinkage, 0, "", false);
+		global->setAlignment(Align);
+
+		return global;
+	}
+
+	llvm::Type *Nucleus::getPointerType(const llvm::Type *ElementType)
+	{
+		return llvm::PointerType::get(ElementType, 0);
+	}
+
+	llvm::Constant *Nucleus::createNullValue(const llvm::Type *Ty)
+	{
+		return llvm::Constant::getNullValue(Ty);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantLong(int64_t i)
+	{
+		return llvm::ConstantInt::get(Type::getInt64Ty(*context), i, true);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantInt(int i)
+	{
+		return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, true);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantInt(unsigned int i)
+	{
+		return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, false);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantBool(bool b)
+	{
+		return llvm::ConstantInt::get(Type::getInt1Ty(*context), b);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantByte(signed char i)
+	{
+		return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, true);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantByte(unsigned char i)
+	{
+		return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, false);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantShort(short i)
+	{
+		return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, true);
+	}
+
+	llvm::ConstantInt *Nucleus::createConstantShort(unsigned short i)
+	{
+		return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, false);
+	}
+
+	llvm::Constant *Nucleus::createConstantFloat(float x)
+	{
+		return ConstantFP::get(Float::getType(), x);
+	}
+
+	llvm::Value *Nucleus::createNullPointer(const llvm::Type *Ty)
+	{
+		return llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0));
+	}
+
+	llvm::Value *Nucleus::createConstantVector(Constant* const* Vals, unsigned NumVals)
+	{
+		return llvm::ConstantVector::get(Vals, NumVals);
+	}
+
+	const Type *Void::getType()
+	{
+		return Type::getVoidTy(*Nucleus::getContext());
+	}
+
+	Bool::Bool(Argument *argument)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(argument, address);
+	}
+
+	Bool::Bool()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Bool::Bool(bool x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantBool(x), address);
+	}
+
+	Bool::Bool(const RValue<Bool> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Bool::Bool(const Bool &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Bool> Bool::operator=(const RValue<Bool> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Bool> Bool::operator=(const Bool &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Bool>(value);
+	}
+
+	RValue<Pointer<Bool>> Bool::operator&()
+	{
+		return RValue<Pointer<Bool>>(address);
+	}
+
+	RValue<Bool> operator!(const RValue<Bool> &val)
+	{
+		return RValue<Bool>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Bool> operator&&(const RValue<Bool> &lhs, const RValue<Bool> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator||(const RValue<Bool> &lhs, const RValue<Bool> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	Bool *Bool::getThis()
+	{
+		return this;
+	}
+
+	const Type *Bool::getType()
+	{
+		return Type::getInt1Ty(*Nucleus::getContext());
+	}
+
+	Byte::Byte(Argument *argument)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(argument, address);
+	}
+
+	Byte::Byte(const RValue<Int> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Byte::Byte()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Byte::Byte(int x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantByte((unsigned char)x), address);
+	}
+
+	Byte::Byte(unsigned char x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantByte(x), address);
+	}
+
+	Byte::Byte(const RValue<Byte> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Byte::Byte(const Byte &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Byte> Byte::operator=(const RValue<Byte> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Byte> Byte::operator=(const Byte &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Byte>(value);
+	}
+
+	RValue<Pointer<Byte>> Byte::operator&()
+	{
+		return RValue<Pointer<Byte>>(address);
+	}
+
+	RValue<Byte> operator+(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator-(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator*(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator/(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator%(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator&(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator|(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator^(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator<<(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator>>(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
+	}
+
+	RValue<Byte> operator+=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Byte> operator-=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Byte> operator*=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Byte> operator/=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Byte> operator%=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Byte> operator&=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<Byte> operator|=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<Byte> operator^=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<Byte> operator<<=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Byte> operator>>=(const Byte &lhs, const RValue<Byte> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Byte> operator+(const RValue<Byte> &val)
+	{
+		return val;
+	}
+
+	RValue<Byte> operator-(const RValue<Byte> &val)
+	{
+		return RValue<Byte>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<Byte> operator~(const RValue<Byte> &val)
+	{
+		return RValue<Byte>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Byte> operator++(const Byte &val, int)   // Post-increment
+	{
+		RValue<Byte> res = val;
+
+		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const Byte &operator++(const Byte &val)   // Pre-increment
+	{
+		Value *inc = Nucleus::createAdd(Nucleus::createLoad(val.address), Nucleus::createConstantByte((unsigned char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Byte> operator--(const Byte &val, int)   // Post-decrement
+	{
+		RValue<Byte> res = val;
+
+		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const Byte &operator--(const Byte &val)   // Pre-decrement
+	{
+		Value *inc = Nucleus::createSub(Nucleus::createLoad(val.address), Nucleus::createConstantByte((unsigned char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Bool> operator<(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator<=(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>=(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator!=(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator==(const RValue<Byte> &lhs, const RValue<Byte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
+	}
+
+	Byte *Byte::getThis()
+	{
+		return this;
+	}
+
+	const Type *Byte::getType()
+	{
+		return Type::getInt8Ty(*Nucleus::getContext());
+	}
+
+	SByte::SByte(Argument *argument)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(argument, address);
+	}
+
+	SByte::SByte()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	SByte::SByte(signed char x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantByte(x), address);
+	}
+
+	SByte::SByte(const RValue<SByte> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	SByte::SByte(const SByte &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<SByte> SByte::operator=(const RValue<SByte> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<SByte> SByte::operator=(const SByte &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<SByte>(value);
+	}
+
+	RValue<Pointer<SByte>> SByte::operator&()
+	{
+		return RValue<Pointer<SByte>>(address);
+	}
+
+	RValue<SByte> operator+(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator-(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator*(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator/(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator%(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator&(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator|(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator^(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator<<(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator>>(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
+	}
+
+	RValue<SByte> operator+=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<SByte> operator-=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<SByte> operator*=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<SByte> operator/=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<SByte> operator%=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<SByte> operator&=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<SByte> operator|=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<SByte> operator^=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<SByte> operator<<=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<SByte> operator>>=(const SByte &lhs, const RValue<SByte> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<SByte> operator+(const RValue<SByte> &val)
+	{
+		return val;
+	}
+
+	RValue<SByte> operator-(const RValue<SByte> &val)
+	{
+		return RValue<SByte>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<SByte> operator~(const RValue<SByte> &val)
+	{
+		return RValue<SByte>(Nucleus::createNot(val.value));
+	}
+
+	RValue<SByte> operator++(const SByte &val, int)   // Post-increment
+	{
+		RValue<SByte> res = val;
+
+		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const SByte &operator++(const SByte &val)   // Pre-increment
+	{
+		Value *inc = Nucleus::createAdd(Nucleus::createLoad(val.address), Nucleus::createConstantByte((signed char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<SByte> operator--(const SByte &val, int)   // Post-decrement
+	{
+		RValue<SByte> res = val;
+
+		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const SByte &operator--(const SByte &val)   // Pre-decrement
+	{
+		Value *inc = Nucleus::createSub(Nucleus::createLoad(val.address), Nucleus::createConstantByte((signed char)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Bool> operator<(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator<=(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>=(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator!=(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator==(const RValue<SByte> &lhs, const RValue<SByte> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
+	}
+
+	SByte *SByte::getThis()
+	{
+		return this;
+	}
+
+	const Type *SByte::getType()
+	{
+		return Type::getInt8Ty(*Nucleus::getContext());
+	}
+
+	Short::Short(Argument *argument)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(argument, address);
+	}
+
+	Short::Short(const RValue<Int> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Short::Short()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Short::Short(short x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantShort(x), address);
+	}
+
+	Short::Short(const RValue<Short> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Short::Short(const Short &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Short> Short::operator=(const RValue<Short> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Short> Short::operator=(const Short &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Short>(value);
+	}
+
+	RValue<Pointer<Short>> Short::operator&()
+	{
+		return RValue<Pointer<Short>>(address);
+	}
+
+	RValue<Short> operator+(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator-(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator*(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator/(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator%(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator&(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator|(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator^(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator<<(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator>>(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
+	}
+
+	RValue<Short> operator+=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Short> operator-=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Short> operator*=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Short> operator/=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Short> operator%=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Short> operator&=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<Short> operator|=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<Short> operator^=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<Short> operator<<=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Short> operator>>=(const Short &lhs, const RValue<Short> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Short> operator+(const RValue<Short> &val)
+	{
+		return val;
+	}
+
+	RValue<Short> operator-(const RValue<Short> &val)
+	{
+		return RValue<Short>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<Short> operator~(const RValue<Short> &val)
+	{
+		return RValue<Short>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Short> operator++(const Short &val, int)   // Post-increment
+	{
+		RValue<Short> res = val;
+
+		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const Short &operator++(const Short &val)   // Pre-increment
+	{
+		Value *inc = Nucleus::createAdd(Nucleus::createLoad(val.address), Nucleus::createConstantShort((short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Short> operator--(const Short &val, int)   // Post-decrement
+	{
+		RValue<Short> res = val;
+
+		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const Short &operator--(const Short &val)   // Pre-decrement
+	{
+		Value *inc = Nucleus::createSub(Nucleus::createLoad(val.address), Nucleus::createConstantShort((short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Bool> operator<(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator<=(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>=(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator!=(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator==(const RValue<Short> &lhs, const RValue<Short> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
+	}
+
+	Short *Short::getThis()
+	{
+		return this;
+	}
+
+	const Type *Short::getType()
+	{
+		return Type::getInt16Ty(*Nucleus::getContext());
+	}
+
+	UShort::UShort(Argument *argument)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(argument, address);
+	}
+
+	UShort::UShort()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	UShort::UShort(unsigned short x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantShort(x), address);
+	}
+
+	UShort::UShort(const RValue<UShort> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	UShort::UShort(const UShort &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<UShort> UShort::operator=(const RValue<UShort> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UShort> UShort::operator=(const UShort &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UShort>(value);
+	}
+
+	RValue<Pointer<UShort>> UShort::operator&()
+	{
+		return RValue<Pointer<UShort>>(address);
+	}
+
+	RValue<UShort> operator+(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator-(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator*(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator/(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator%(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator&(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator|(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator^(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator<<(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator>>(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
+	}
+
+	RValue<UShort> operator+=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<UShort> operator-=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<UShort> operator*=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<UShort> operator/=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<UShort> operator%=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<UShort> operator&=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<UShort> operator|=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<UShort> operator^=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<UShort> operator<<=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<UShort> operator>>=(const UShort &lhs, const RValue<UShort> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<UShort> operator+(const RValue<UShort> &val)
+	{
+		return val;
+	}
+
+	RValue<UShort> operator-(const RValue<UShort> &val)
+	{
+		return RValue<UShort>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<UShort> operator~(const RValue<UShort> &val)
+	{
+		return RValue<UShort>(Nucleus::createNot(val.value));
+	}
+
+	RValue<UShort> operator++(const UShort &val, int)   // Post-increment
+	{
+		RValue<UShort> res = val;
+
+		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const UShort &operator++(const UShort &val)   // Pre-increment
+	{
+		Value *inc = Nucleus::createAdd(Nucleus::createLoad(val.address), Nucleus::createConstantShort((unsigned short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<UShort> operator--(const UShort &val, int)   // Post-decrement
+	{
+		RValue<UShort> res = val;
+
+		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const UShort &operator--(const UShort &val)   // Pre-decrement
+	{
+		Value *inc = Nucleus::createSub(Nucleus::createLoad(val.address), Nucleus::createConstantShort((unsigned short)1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Bool> operator<(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator<=(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>=(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator!=(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator==(const RValue<UShort> &lhs, const RValue<UShort> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
+	}
+
+	UShort *UShort::getThis()
+	{
+		return this;
+	}
+
+	const Type *UShort::getType()
+	{
+		return Type::getInt16Ty(*Nucleus::getContext());
+	}
+
+	Byte4 *Byte4::getThis()
+	{
+		return this;
+	}
+
+	const Type *Byte4::getType()
+	{
+		#if 0
+			return VectorType::get(Byte::getType(), 4);
+		#else
+			return UInt::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
+		#endif
+	}
+
+	SByte4 *SByte4::getThis()
+	{
+		return this;
+	}
+
+	const Type *SByte4::getType()
+	{
+		#if 0
+			return VectorType::get(SByte::getType(), 4);
+		#else
+			return Int::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
+		#endif
+	}
+
+	Byte8::Byte8()
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Byte8::Byte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[8];
+		constantVector[0] = Nucleus::createConstantByte(x0);
+		constantVector[1] = Nucleus::createConstantByte(x1);
+		constantVector[2] = Nucleus::createConstantByte(x2);
+		constantVector[3] = Nucleus::createConstantByte(x3);
+		constantVector[4] = Nucleus::createConstantByte(x4);
+		constantVector[5] = Nucleus::createConstantByte(x5);
+		constantVector[6] = Nucleus::createConstantByte(x6);
+		constantVector[7] = Nucleus::createConstantByte(x7);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 8), address);
+	}
+
+	Byte8::Byte8(int64_t x)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[8];
+		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
+		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
+		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
+		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
+		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
+		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
+		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
+		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 8), address);
+	}
+
+	Byte8::Byte8(const RValue<Byte8> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Byte8::Byte8(const Byte8 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Byte8> Byte8::operator=(const RValue<Byte8> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Byte8>(value);
+	}
+
+	RValue<Byte8> operator+(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Byte8> operator-(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Byte8> operator*(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<Byte8> operator/(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Byte8> operator%(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
+	}
+
+	RValue<Byte8> operator&(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Byte8> operator|(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<Byte8> operator^(const RValue<Byte8> &lhs, const RValue<Byte8> &rhs)
+	{
+		return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+//	RValue<Byte8> operator<<(const RValue<Byte8> &lhs, unsigned char rhs)
+//	{
+//		return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
+//	}
+
+//	RValue<Byte8> operator>>(const RValue<Byte8> &lhs, unsigned char rhs)
+//	{
+//		return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
+//	}
+
+	RValue<Byte8> operator+=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Byte8> operator-=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Byte8> operator*=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Byte8> operator/=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Byte8> operator%=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Byte8> operator&=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<Byte8> operator|=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<Byte8> operator^=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+//	RValue<Byte8> operator<<=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+//	{
+//		return lhs = lhs << rhs;
+//	}
+
+//	RValue<Byte8> operator>>=(const Byte8 &lhs, const RValue<Byte8> &rhs)
+//	{
+//		return lhs = lhs >> rhs;
+//	}
+
+	RValue<Byte8> operator+(const RValue<Byte8> &val)
+	{
+		return val;
+	}
+
+	RValue<Byte8> operator-(const RValue<Byte8> &val)
+	{
+		return RValue<Byte8>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<Byte8> operator~(const RValue<Byte8> &val)
+	{
+		return RValue<Byte8>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Byte8> AddSat(const RValue<Byte8> &x, const RValue<Byte8> &y)
+	{
+		return x86::paddusb(x, y);
+	}
+	
+	RValue<Byte8> SubSat(const RValue<Byte8> &x, const RValue<Byte8> &y)
+	{
+		return x86::psubusb(x, y);
+	}
+
+	RValue<Short4> UnpackLow(const RValue<Byte8> &x, const RValue<Byte8> &y)
+	{
+		Constant *shuffle[8];
+		shuffle[0] = Nucleus::createConstantInt(0);
+		shuffle[1] = Nucleus::createConstantInt(8);
+		shuffle[2] = Nucleus::createConstantInt(1);
+		shuffle[3] = Nucleus::createConstantInt(9);
+		shuffle[4] = Nucleus::createConstantInt(2);
+		shuffle[5] = Nucleus::createConstantInt(10);
+		shuffle[6] = Nucleus::createConstantInt(3);
+		shuffle[7] = Nucleus::createConstantInt(11);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
+
+		return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
+	}
+	
+	RValue<Short4> UnpackHigh(const RValue<Byte8> &x, const RValue<Byte8> &y)
+	{
+		Constant *shuffle[8];
+		shuffle[0] = Nucleus::createConstantInt(4);
+		shuffle[1] = Nucleus::createConstantInt(12);
+		shuffle[2] = Nucleus::createConstantInt(5);
+		shuffle[3] = Nucleus::createConstantInt(13);
+		shuffle[4] = Nucleus::createConstantInt(6);
+		shuffle[5] = Nucleus::createConstantInt(14);
+		shuffle[6] = Nucleus::createConstantInt(7);
+		shuffle[7] = Nucleus::createConstantInt(15);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
+
+		return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
+	}
+
+	RValue<Int> SignMask(const RValue<Byte8> &x)
+	{
+		return x86::pmovmskb(x);
+	}
+
+//	RValue<Byte8> CmpGT(const RValue<Byte8> &x, const RValue<Byte8> &y)
+//	{
+//		return x86::pcmpgtb(x, y);   // FIXME: Signedness
+//	}
+	
+	RValue<Byte8> CmpEQ(const RValue<Byte8> &x, const RValue<Byte8> &y)
+	{
+		return x86::pcmpeqb(x, y);
+	}
+
+	Byte8 *Byte8::getThis()
+	{
+		return this;
+	}
+
+	const Type *Byte8::getType()
+	{
+		return VectorType::get(Byte::getType(), 8);
+	}
+
+	SByte8::SByte8()
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	SByte8::SByte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[8];
+		constantVector[0] = Nucleus::createConstantByte(x0);
+		constantVector[1] = Nucleus::createConstantByte(x1);
+		constantVector[2] = Nucleus::createConstantByte(x2);
+		constantVector[3] = Nucleus::createConstantByte(x3);
+		constantVector[4] = Nucleus::createConstantByte(x4);
+		constantVector[5] = Nucleus::createConstantByte(x5);
+		constantVector[6] = Nucleus::createConstantByte(x6);
+		constantVector[7] = Nucleus::createConstantByte(x7);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 8), address);
+	}
+
+	SByte8::SByte8(int64_t x)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[8];
+		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
+		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
+		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
+		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
+		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
+		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
+		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
+		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 8), address);
+	}
+
+	SByte8::SByte8(const RValue<SByte8> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	SByte8::SByte8(const SByte8 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<SByte8> SByte8::operator=(const RValue<SByte8> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<SByte8>(value);
+	}
+
+	RValue<SByte8> operator+(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<SByte8> operator-(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<SByte8> operator*(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<SByte8> operator/(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
+	}
+
+	RValue<SByte8> operator%(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
+	}
+
+	RValue<SByte8> operator&(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<SByte8> operator|(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<SByte8> operator^(const RValue<SByte8> &lhs, const RValue<SByte8> &rhs)
+	{
+		return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+//	RValue<SByte8> operator<<(const RValue<SByte8> &lhs, unsigned char rhs)
+//	{
+//		return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
+//	}
+
+//	RValue<SByte8> operator>>(const RValue<SByte8> &lhs, unsigned char rhs)
+//	{
+//		return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
+//	}
+
+	RValue<SByte8> operator+=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<SByte8> operator-=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<SByte8> operator*=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<SByte8> operator/=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<SByte8> operator%=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<SByte8> operator&=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<SByte8> operator|=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<SByte8> operator^=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+//	RValue<SByte8> operator<<=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+//	{
+//		return lhs = lhs << rhs;
+//	}
+
+//	RValue<SByte8> operator>>=(const SByte8 &lhs, const RValue<SByte8> &rhs)
+//	{
+//		return lhs = lhs >> rhs;
+//	}
+
+	RValue<SByte8> operator+(const RValue<SByte8> &val)
+	{
+		return val;
+	}
+
+	RValue<SByte8> operator-(const RValue<SByte8> &val)
+	{
+		return RValue<SByte8>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<SByte8> operator~(const RValue<SByte8> &val)
+	{
+		return RValue<SByte8>(Nucleus::createNot(val.value));
+	}
+
+	RValue<SByte8> AddSat(const RValue<SByte8> &x, const RValue<SByte8> &y)
+	{
+		return x86::paddsb(x, y);
+	}
+	
+	RValue<SByte8> SubSat(const RValue<SByte8> &x, const RValue<SByte8> &y)
+	{
+		return x86::psubsb(x, y);
+	}
+
+	RValue<Short4> UnpackLow(const RValue<SByte8> &x, const RValue<SByte8> &y)
+	{
+		Constant *shuffle[8];
+		shuffle[0] = Nucleus::createConstantInt(0);
+		shuffle[1] = Nucleus::createConstantInt(8);
+		shuffle[2] = Nucleus::createConstantInt(1);
+		shuffle[3] = Nucleus::createConstantInt(9);
+		shuffle[4] = Nucleus::createConstantInt(2);
+		shuffle[5] = Nucleus::createConstantInt(10);
+		shuffle[6] = Nucleus::createConstantInt(3);
+		shuffle[7] = Nucleus::createConstantInt(11);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
+
+		return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
+	}
+	
+	RValue<Short4> UnpackHigh(const RValue<SByte8> &x, const RValue<SByte8> &y)
+	{
+		Constant *shuffle[8];
+		shuffle[0] = Nucleus::createConstantInt(4);
+		shuffle[1] = Nucleus::createConstantInt(12);
+		shuffle[2] = Nucleus::createConstantInt(5);
+		shuffle[3] = Nucleus::createConstantInt(13);
+		shuffle[4] = Nucleus::createConstantInt(6);
+		shuffle[5] = Nucleus::createConstantInt(14);
+		shuffle[6] = Nucleus::createConstantInt(7);
+		shuffle[7] = Nucleus::createConstantInt(15);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
+
+		return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
+	}
+
+	RValue<Int> SignMask(const RValue<SByte8> &x)
+	{
+		return x86::pmovmskb(As<Byte8>(x));
+	}
+
+	RValue<Byte8> CmpGT(const RValue<SByte8> &x, const RValue<SByte8> &y)
+	{
+		return x86::pcmpgtb(x, y);
+	}
+	
+	RValue<Byte8> CmpEQ(const RValue<SByte8> &x, const RValue<SByte8> &y)
+	{
+		return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
+	}
+
+	SByte8 *SByte8::getThis()
+	{
+		return this;
+	}
+
+	const Type *SByte8::getType()
+	{
+		return VectorType::get(SByte::getType(), 8);
+	}
+
+	Byte16::Byte16(const RValue<Byte16> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Byte16::Byte16(const Byte16 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Byte16> Byte16::operator=(const RValue<Byte16> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Byte16>(value);
+	}
+
+	Byte16 *Byte16::getThis()
+	{
+		return this;
+	}
+
+	const Type *Byte16::getType()
+	{
+		return VectorType::get(Byte::getType(), 16);
+	}
+
+	SByte16 *SByte16::getThis()
+	{
+		return this;
+	}
+
+	const Type *SByte16::getType()
+	{
+		return VectorType::get(SByte::getType(), 16);
+	}
+
+	Short4::Short4(const RValue<Int> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
+		Value *vector = Nucleus::createBitCast(extend, Short4::getType());
+		Value *swizzle = Nucleus::createSwizzle(vector, 0x00);
+		
+		Nucleus::createStore(swizzle, address);
+	}
+
+	Short4::Short4(const RValue<Int4> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
+
+		#if 0   // FIXME: Check codegen (pshuflw phshufhw pshufd)
+			Constant *pack[8];
+			pack[0] = Nucleus::createConstantInt(0);
+			pack[1] = Nucleus::createConstantInt(2);
+			pack[2] = Nucleus::createConstantInt(4);
+			pack[3] = Nucleus::createConstantInt(6);
+
+			Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
+		#else
+			Value *packed;
+
+			// FIXME: Use Swizzle<Short8>
+			if(!CPUID::supportsSSSE3())
+			{
+				Constant *pshuflw[8];
+				pshuflw[0] = Nucleus::createConstantInt(0);
+				pshuflw[1] = Nucleus::createConstantInt(2);
+				pshuflw[2] = Nucleus::createConstantInt(0);
+				pshuflw[3] = Nucleus::createConstantInt(2);
+				pshuflw[4] = Nucleus::createConstantInt(4);
+				pshuflw[5] = Nucleus::createConstantInt(5);
+				pshuflw[6] = Nucleus::createConstantInt(6);
+				pshuflw[7] = Nucleus::createConstantInt(7);
+
+				Constant *pshufhw[8];
+				pshufhw[0] = Nucleus::createConstantInt(0);
+				pshufhw[1] = Nucleus::createConstantInt(1);
+				pshufhw[2] = Nucleus::createConstantInt(2);
+				pshufhw[3] = Nucleus::createConstantInt(3);
+				pshufhw[4] = Nucleus::createConstantInt(4);
+				pshufhw[5] = Nucleus::createConstantInt(6);
+				pshufhw[6] = Nucleus::createConstantInt(4);
+				pshufhw[7] = Nucleus::createConstantInt(6);
+
+				Value *shuffle1 = Nucleus::createShuffleVector(short8, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshuflw, 8));
+				Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshufhw, 8));
+				Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
+				packed = Nucleus::createSwizzle(int4, 0x88);
+			}
+			else
+			{
+				Constant *pshufb[16];
+				pshufb[0] = Nucleus::createConstantInt(0);
+				pshufb[1] = Nucleus::createConstantInt(1);
+				pshufb[2] = Nucleus::createConstantInt(4);
+				pshufb[3] = Nucleus::createConstantInt(5);
+				pshufb[4] = Nucleus::createConstantInt(8);
+				pshufb[5] = Nucleus::createConstantInt(9);
+				pshufb[6] = Nucleus::createConstantInt(12);
+				pshufb[7] = Nucleus::createConstantInt(13);
+				pshufb[8] = Nucleus::createConstantInt(0);
+				pshufb[9] = Nucleus::createConstantInt(1);
+				pshufb[10] = Nucleus::createConstantInt(4);
+				pshufb[11] = Nucleus::createConstantInt(5);
+				pshufb[12] = Nucleus::createConstantInt(8);
+				pshufb[13] = Nucleus::createConstantInt(9);
+				pshufb[14] = Nucleus::createConstantInt(12);
+				pshufb[15] = Nucleus::createConstantInt(13);
+
+				Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
+				packed = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
+			}
+
+			#if 0   // FIXME: No optimal instruction selection
+				Value *qword2 = Nucleus::createBitCast(packed, Long2::getType());
+				Value *element = Nucleus::createExtractElement(qword2, 0);
+				Value *short4 = Nucleus::createBitCast(element, Short4::getType());
+			#else   // FIXME: Requires SSE
+				Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
+				Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
+			#endif
+		#endif
+
+		Nucleus::createStore(short4, address);
+	}
+
+//	Short4::Short4(const RValue<Float> &cast)
+//	{
+//	}
+
+	Short4::Short4(const RValue<Float4> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Int4 v4i32 = Int4(cast);
+		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
+		
+		Nucleus::createStore(As<Short4>(Int2(v4i32)).value, address);
+	}
+
+	Short4::Short4()
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Short4::Short4(short x, short y, short z, short w)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[4];
+		constantVector[0] = Nucleus::createConstantShort(x);
+		constantVector[1] = Nucleus::createConstantShort(y);
+		constantVector[2] = Nucleus::createConstantShort(z);
+		constantVector[3] = Nucleus::createConstantShort(w);
+		Nucleus::createConstantVector(constantVector, 4);
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 4), address);
+	}
+
+	Short4::Short4(const RValue<Short4> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Short4::Short4(const Short4 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	Short4::Short4(const RValue<UShort4> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Short4::Short4(const UShort4 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Short4> Short4::operator=(const RValue<Short4> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Short4> Short4::operator=(const Short4 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Short4>(value);
+	}
+
+	RValue<Short4> Short4::operator=(const RValue<UShort4> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Short4> Short4::operator=(const UShort4 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Short4>(value);
+	}
+
+	RValue<Short4> operator+(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator-(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator*(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator/(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator%(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator&(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator|(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator^(const RValue<Short4> &lhs, const RValue<Short4> &rhs)
+	{
+		return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<Short4> operator<<(const RValue<Short4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return x86::psllw(lhs, rhs);
+	}
+
+	RValue<Short4> operator>>(const RValue<Short4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
+
+		return x86::psraw(lhs, rhs);
+	}
+
+	RValue<Short4> operator<<(const RValue<Short4> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return x86::psllw(lhs, rhs);
+	}
+
+	RValue<Short4> operator>>(const RValue<Short4> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
+
+		return x86::psraw(lhs, rhs);
+	}
+
+	RValue<Short4> operator+=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Short4> operator-=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Short4> operator*=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Short4> operator/=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Short4> operator%=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Short4> operator&=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<Short4> operator|=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<Short4> operator^=(const Short4 &lhs, const RValue<Short4> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Short4> operator<<=(const Short4 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Short4> operator>>=(const Short4 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Short4> operator+(const RValue<Short4> &val)
+	{
+		return val;
+	}
+
+	RValue<Short4> operator-(const RValue<Short4> &val)
+	{
+		return RValue<Short4>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<Short4> operator~(const RValue<Short4> &val)
+	{
+		return RValue<Short4>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Short4> RoundShort4(const RValue<Float4> &cast)
+	{
+		RValue<Int4> v4i32 = x86::cvtps2dq(cast);
+		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
+		
+		return As<Short4>(Int2(v4i32));
+	}
+
+	RValue<Short4> Max(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::pmaxsw(x, y);
+	}
+
+	RValue<Short4> Min(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::pminsw(x, y);
+	}
+
+	RValue<Short4> AddSat(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::paddsw(x, y);
+	}
+
+	RValue<Short4> SubSat(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::psubsw(x, y);
+	}
+
+	RValue<Short4> MulHigh(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::pmulhw(x, y);
+	}
+
+	RValue<Int2> MulAdd(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::pmaddwd(x, y);
+	}
+
+	RValue<SByte8> Pack(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::packsswb(x, y);
+	}
+
+	RValue<Int2> UnpackLow(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		Constant *shuffle[4];
+		shuffle[0] = Nucleus::createConstantInt(0);
+		shuffle[1] = Nucleus::createConstantInt(4);
+		shuffle[2] = Nucleus::createConstantInt(1);
+		shuffle[3] = Nucleus::createConstantInt(5);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
+
+		return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
+	}
+
+	RValue<Int2> UnpackHigh(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		Constant *shuffle[4];
+		shuffle[0] = Nucleus::createConstantInt(2);
+		shuffle[1] = Nucleus::createConstantInt(6);
+		shuffle[2] = Nucleus::createConstantInt(3);
+		shuffle[3] = Nucleus::createConstantInt(7);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
+
+		return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
+	}
+
+	RValue<Short4> Swizzle(const RValue<Short4> &x, unsigned char select)
+	{
+		return RValue<Short4>(Nucleus::createSwizzle(x.value, select));
+	}
+
+	RValue<Short4> Insert(const RValue<Short4> &val, const RValue<Short> &element, int i)
+	{
+		return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
+	}
+
+	RValue<Short> Extract(const RValue<Short4> &val, int i)
+	{
+		return RValue<Short>(Nucleus::createExtractElement(val.value, i));
+	}
+
+	RValue<Short4> CmpGT(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::pcmpgtw(x, y);
+	}
+
+	RValue<Short4> CmpEQ(const RValue<Short4> &x, const RValue<Short4> &y)
+	{
+		return x86::pcmpeqw(x, y);
+	}
+
+	Short4 *Short4::getThis()
+	{
+		return this;
+	}
+
+	const Type *Short4::getType()
+	{
+		return VectorType::get(Short::getType(), 4);
+	}
+
+	UShort4::UShort4(const RValue<Int4> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		*this = Short4(cast);
+	}
+
+	UShort4::UShort4(const RValue<Float4> &cast, bool saturate)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Float4 sat;
+
+		if(saturate)
+		{
+			if(CPUID::supportsSSE4_1())
+			{
+				sat = Min(cast, Float4(0xFFFF));   // packusdw takes care of 0x0000 saturation
+			}
+			else
+			{
+				sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
+			}
+		}
+		else
+		{
+			sat = cast;
+		}
+
+		Int4 int4(sat);
+
+		if(!saturate || !CPUID::supportsSSE4_1())
+		{
+			*this = Short4(Int4(int4));
+		}
+		else
+		{
+			*this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
+		}
+	}
+
+	UShort4::UShort4()
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[4];
+		constantVector[0] = Nucleus::createConstantShort(x);
+		constantVector[1] = Nucleus::createConstantShort(y);
+		constantVector[2] = Nucleus::createConstantShort(z);
+		constantVector[3] = Nucleus::createConstantShort(w);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 4), address);
+	}
+
+	UShort4::UShort4(const RValue<UShort4> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	UShort4::UShort4(const UShort4 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	UShort4::UShort4(const RValue<Short4> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	UShort4::UShort4(const Short4 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<UShort4> UShort4::operator=(const RValue<UShort4> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UShort4>(value);
+	}
+
+	RValue<UShort4> UShort4::operator=(const RValue<Short4> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UShort4> UShort4::operator=(const Short4 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UShort4>(value);
+	}
+
+	RValue<UShort4> operator+(const RValue<UShort4> &lhs, const RValue<UShort4> &rhs)
+	{
+		return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<UShort4> operator-(const RValue<UShort4> &lhs, const RValue<UShort4> &rhs)
+	{
+		return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<UShort4> operator*(const RValue<UShort4> &lhs, const RValue<UShort4> &rhs)
+	{
+		return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<UShort4> operator<<(const RValue<UShort4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
+	}
+
+	RValue<UShort4> operator>>(const RValue<UShort4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
+
+		return x86::psrlw(lhs, rhs);
+	}
+
+	RValue<UShort4> operator<<(const RValue<UShort4> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
+	}
+
+	RValue<UShort4> operator>>(const RValue<UShort4> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
+
+		return x86::psrlw(lhs, rhs);
+	}
+
+	RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<UShort4> operator<<=(const UShort4 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<UShort4> operator>>=(const UShort4 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<UShort4> operator~(const RValue<UShort4> &val)
+	{
+		return RValue<UShort4>(Nucleus::createNot(val.value));
+	}
+
+	RValue<UShort4> Max(const RValue<UShort4> &x, const RValue<UShort4> &y)
+	{
+		return Max(As<Short4>(x) - Short4(0x8000, 0x8000, 0x8000, 0x8000), As<Short4>(y) - Short4(0x8000, 0x8000, 0x8000, 0x8000)) + Short4(0x8000, 0x8000, 0x8000, 0x8000);;
+	}
+
+	RValue<UShort4> Min(const RValue<UShort4> &x, const RValue<UShort4> &y)
+	{
+		return Min(As<Short4>(x) - Short4(0x8000, 0x8000, 0x8000, 0x8000), As<Short4>(y) - Short4(0x8000, 0x8000, 0x8000, 0x8000)) + Short4(0x8000, 0x8000, 0x8000, 0x8000);;
+	}
+
+	RValue<UShort4> AddSat(const RValue<UShort4> &x, const RValue<UShort4> &y)
+	{
+		return x86::paddusw(x, y);
+	}
+
+	RValue<UShort4> SubSat(const RValue<UShort4> &x, const RValue<UShort4> &y)
+	{
+		return x86::psubusw(x, y);
+	}
+
+	RValue<UShort4> MulHigh(const RValue<UShort4> &x, const RValue<UShort4> &y)
+	{
+		return x86::pmulhuw(x, y);
+	}
+
+	RValue<UShort4> Average(const RValue<UShort4> &x, const RValue<UShort4> &y)
+	{
+		return x86::pavgw(x, y);
+	}
+
+	RValue<Byte8> Pack(const RValue<UShort4> &x, const RValue<UShort4> &y)
+	{
+		return x86::packuswb(x, y);
+	}
+
+	UShort4 *UShort4::getThis()
+	{
+		return this;
+	}
+
+	const Type *UShort4::getType()
+	{
+		return VectorType::get(UShort::getType(), 4);
+	}
+
+	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[8];
+		constantVector[0] = Nucleus::createConstantShort(c0);
+		constantVector[1] = Nucleus::createConstantShort(c1);
+		constantVector[2] = Nucleus::createConstantShort(c2);
+		constantVector[3] = Nucleus::createConstantShort(c3);
+		constantVector[4] = Nucleus::createConstantShort(c4);
+		constantVector[5] = Nucleus::createConstantShort(c5);
+		constantVector[6] = Nucleus::createConstantShort(c6);
+		constantVector[7] = Nucleus::createConstantShort(c7);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 8), address);
+	}
+
+	Short8::Short8(const RValue<Short8> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	RValue<Short8> operator+(const RValue<Short8> &lhs, const RValue<Short8> &rhs)
+	{
+		return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Short8> operator&(const RValue<Short8> &lhs, const RValue<Short8> &rhs)
+	{
+		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Short8> operator<<(const RValue<Short8> &lhs, unsigned char rhs)
+	{
+		return x86::psllw(lhs, rhs);   // FIXME: Fallback required
+	}
+
+	RValue<Short8> operator>>(const RValue<Short8> &lhs, unsigned char rhs)
+	{
+		return x86::psraw(lhs, rhs);   // FIXME: Fallback required
+	}
+
+	RValue<Short8> Concatenate(const RValue<Short4> &lo, const RValue<Short4> &hi)
+	{
+		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
+		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+
+		Value *long2 = UndefValue::get(Long2::getType());
+		long2 = Nucleus::createInsertElement(long2, loLong, 0);
+		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
+		Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
+
+		return RValue<Short8>(short8);
+	}
+
+	RValue<Int4> MulAdd(const RValue<Short8> &x, const RValue<Short8> &y)
+	{
+		return x86::pmaddwd(x, y);   // FIXME: Fallback required
+	}
+
+	RValue<Short8> MulHigh(const RValue<Short8> &x, const RValue<Short8> &y)
+	{
+		return x86::pmulhw(x, y);   // FIXME: Fallback required
+	}
+
+	Short8 *Short8::getThis()
+	{
+		return this;
+	}
+
+	const Type *Short8::getType()
+	{
+		return VectorType::get(Short::getType(), 8);
+	}
+
+	UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[8];
+		constantVector[0] = Nucleus::createConstantShort(c0);
+		constantVector[1] = Nucleus::createConstantShort(c1);
+		constantVector[2] = Nucleus::createConstantShort(c2);
+		constantVector[3] = Nucleus::createConstantShort(c3);
+		constantVector[4] = Nucleus::createConstantShort(c4);
+		constantVector[5] = Nucleus::createConstantShort(c5);
+		constantVector[6] = Nucleus::createConstantShort(c6);
+		constantVector[7] = Nucleus::createConstantShort(c7);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 8), address);
+	}
+
+	UShort8::UShort8(const RValue<UShort8> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	RValue<UShort8> UShort8::operator=(const RValue<UShort8> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UShort8>(value);
+	}
+
+	RValue<UShort8> operator&(const RValue<UShort8> &lhs, const RValue<UShort8> &rhs)
+	{
+		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<UShort8> operator<<(const RValue<UShort8> &lhs, unsigned char rhs)
+	{
+		return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));   // FIXME: Fallback required
+	}
+
+	RValue<UShort8> operator>>(const RValue<UShort8> &lhs, unsigned char rhs)
+	{
+		return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
+	}
+
+	RValue<UShort8> operator+(const RValue<UShort8> &lhs, const RValue<UShort8> &rhs)
+	{
+		return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<UShort8> operator*(const RValue<UShort8> &lhs, const RValue<UShort8> &rhs)
+	{
+		return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<UShort8> operator+=(const UShort8 &lhs, const RValue<UShort8> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<UShort8> operator~(const RValue<UShort8> &val)
+	{
+		return RValue<UShort8>(Nucleus::createNot(val.value));
+	}
+
+	RValue<UShort8> Swizzle(const RValue<UShort8> &x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
+	{
+		Constant *pshufb[16];
+		pshufb[0] = Nucleus::createConstantInt(select0 + 0);
+		pshufb[1] = Nucleus::createConstantInt(select0 + 1);
+		pshufb[2] = Nucleus::createConstantInt(select1 + 0);
+		pshufb[3] = Nucleus::createConstantInt(select1 + 1);
+		pshufb[4] = Nucleus::createConstantInt(select2 + 0);
+		pshufb[5] = Nucleus::createConstantInt(select2 + 1);
+		pshufb[6] = Nucleus::createConstantInt(select3 + 0);
+		pshufb[7] = Nucleus::createConstantInt(select3 + 1);
+		pshufb[8] = Nucleus::createConstantInt(select4 + 0);
+		pshufb[9] = Nucleus::createConstantInt(select4 + 1);
+		pshufb[10] = Nucleus::createConstantInt(select5 + 0);
+		pshufb[11] = Nucleus::createConstantInt(select5 + 1);
+		pshufb[12] = Nucleus::createConstantInt(select6 + 0);
+		pshufb[13] = Nucleus::createConstantInt(select6 + 1);
+		pshufb[14] = Nucleus::createConstantInt(select7 + 0);
+		pshufb[15] = Nucleus::createConstantInt(select7 + 1);
+
+		Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
+		Value *shuffle = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
+		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
+
+		return RValue<UShort8>(short8);
+	}
+
+	RValue<UShort8> Concatenate(const RValue<UShort4> &lo, const RValue<UShort4> &hi)
+	{
+		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
+		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+
+		Value *long2 = UndefValue::get(Long2::getType());
+		long2 = Nucleus::createInsertElement(long2, loLong, 0);
+		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
+		Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
+
+		return RValue<UShort8>(short8);
+	}
+
+	RValue<UShort8> MulHigh(const RValue<UShort8> &x, const RValue<UShort8> &y)
+	{
+		return x86::pmulhuw(x, y);   // FIXME: Fallback required
+	}
+
+	// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
+//	RValue<UShort8> PackRepeat(const RValue<Byte16> &x, const RValue<Byte16> &y, int element)
+//	{
+//		Constant *pshufb[16];
+//		pshufb[0] = Nucleus::createConstantInt(element + 0);
+//		pshufb[1] = Nucleus::createConstantInt(element + 0);
+//		pshufb[2] = Nucleus::createConstantInt(element + 4);
+//		pshufb[3] = Nucleus::createConstantInt(element + 4);
+//		pshufb[4] = Nucleus::createConstantInt(element + 8);
+//		pshufb[5] = Nucleus::createConstantInt(element + 8);
+//		pshufb[6] = Nucleus::createConstantInt(element + 12);
+//		pshufb[7] = Nucleus::createConstantInt(element + 12);
+//		pshufb[8] = Nucleus::createConstantInt(element + 16);
+//		pshufb[9] = Nucleus::createConstantInt(element + 16);
+//		pshufb[10] = Nucleus::createConstantInt(element + 20);
+//		pshufb[11] = Nucleus::createConstantInt(element + 20);
+//		pshufb[12] = Nucleus::createConstantInt(element + 24);
+//		pshufb[13] = Nucleus::createConstantInt(element + 24);
+//		pshufb[14] = Nucleus::createConstantInt(element + 28);
+//		pshufb[15] = Nucleus::createConstantInt(element + 28);
+//
+//		Value *shuffle = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(pshufb, 16));
+//		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
+//
+//		return RValue<UShort8>(short8);
+//	}
+
+	UShort8 *UShort8::getThis()
+	{
+		return this;
+	}
+
+	const Type *UShort8::getType()
+	{
+		return VectorType::get(UShort::getType(), 8);
+	}
+
+	Int::Int(Argument *argument)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(argument, address);
+	}
+
+	Int::Int(const RValue<Byte> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Int::Int(const RValue<SByte> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Int::Int(const RValue<Short> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Int::Int(const RValue<UShort> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Int::Int(const RValue<Int2> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		*this = Extract(cast, 0);
+	}
+
+	Int::Int(const RValue<Long> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Int::Int(const RValue<Float> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Int::Int()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Int::Int(int x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantInt(x), address);
+	}
+
+	Int::Int(const RValue<Int> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Int::Int(const RValue<UInt> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Int::Int(const Int &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	Int::Int(const UInt &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Int> Int::operator=(int rhs) const
+	{
+		return RValue<Int>(Nucleus::createStore(Nucleus::createConstantInt(rhs), address));
+	}
+
+	RValue<Int> Int::operator=(const RValue<Int> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Int> Int::operator=(const RValue<UInt> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Int> Int::operator=(const Int &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Int>(value);
+	}
+
+	RValue<Int> Int::operator=(const UInt &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Int>(value);
+	}
+
+	RValue<Pointer<Int>> Int::operator&()
+	{
+		return RValue<Pointer<Int>>(address);
+	}
+
+	RValue<Int> operator+(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator-(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator*(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator/(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator%(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator&(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator|(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator^(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator<<(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator>>(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
+	}
+
+	RValue<Int> operator+=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Int> operator-=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Int> operator*=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Int> operator/=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Int> operator%=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Int> operator&=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<Int> operator|=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<Int> operator^=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<Int> operator<<=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Int> operator>>=(const Int &lhs, const RValue<Int> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Int> operator+(const RValue<Int> &val)
+	{
+		return val;
+	}
+
+	RValue<Int> operator-(const RValue<Int> &val)
+	{
+		return RValue<Int>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<Int> operator~(const RValue<Int> &val)
+	{
+		return RValue<Int>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Int> operator++(const Int &val, int)   // Post-increment
+	{
+		RValue<Int> res = val;
+
+		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const Int &operator++(const Int &val)   // Pre-increment
+	{
+		Value *inc = Nucleus::createAdd(Nucleus::createLoad(val.address), Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Int> operator--(const Int &val, int)   // Post-decrement
+	{
+		RValue<Int> res = val;
+
+		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const Int &operator--(const Int &val)   // Pre-decrement
+	{
+		Value *inc = Nucleus::createSub(Nucleus::createLoad(val.address), Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Bool> operator<(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator<=(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>=(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator!=(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator==(const RValue<Int> &lhs, const RValue<Int> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
+	}
+
+	RValue<Int> RoundInt(const RValue<Float> &cast)
+	{
+		return x86::cvtss2si(cast);
+
+	//	return IfThenElse(val > Float(0), Int(val + Float(0.5f)), Int(val - Float(0.5f)));
+	}
+
+	Int *Int::getThis()
+	{
+		return this;
+	}
+
+	const Type *Int::getType()
+	{
+		return Type::getInt32Ty(*Nucleus::getContext());
+	}
+
+	Long::Long(const RValue<Int> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createSExt(cast.value, Long::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Long::Long(const RValue<UInt> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createZExt(cast.value, Long::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Long::Long()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Long::Long(const RValue<Long> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	RValue<Long> Long::operator=(int64_t rhs) const
+	{
+		return RValue<Long>(Nucleus::createStore(Nucleus::createConstantLong(rhs), address));
+	}
+
+	RValue<Long> Long::operator=(const RValue<Long> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Long> Long::operator=(const Long &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Long>(value);
+	}
+
+	RValue<Long> operator+(const RValue<Long> &lhs, const RValue<Long> &rhs)
+	{
+		return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Long> operator-(const RValue<Long> &lhs, const RValue<Long> &rhs)
+	{
+		return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Long> operator+=(const Long &lhs, const RValue<Long> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Long> operator-=(const Long &lhs, const RValue<Long> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Long> AddAtomic(const RValue<Pointer<Long>> &x, const RValue<Long> &y)
+	{
+		Module *module = Nucleus::getModule();
+		const llvm::Type *type = Long::getType();
+		llvm::Function *atomic = Intrinsic::getDeclaration(module, Intrinsic::atomic_load_add, &type, 1);
+
+		return RValue<Long>(Nucleus::createCall(atomic, x.value, y.value));
+	}
+
+	Long *Long::getThis()
+	{
+		return this;
+	}
+
+	const Type *Long::getType()
+	{
+		return Type::getInt64Ty(*Nucleus::getContext());
+	}
+
+	Long1::Long1(const Reference<UInt> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *uint = Nucleus::createLoad(cast.address);
+		Value *int64 = Nucleus::createZExt(uint, Long::getType());
+		Value *long1 = Nucleus::createBitCast(int64, Long1::getType());
+		
+		Nucleus::createStore(long1, address);
+	}
+
+	Long1::Long1(const RValue<Long1> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Long1 *Long1::getThis()
+	{
+		return this;
+	}
+
+	const Type *Long1::getType()
+	{
+		return VectorType::get(Long::getType(), 1);
+	}
+
+	RValue<Long2> UnpackHigh(const RValue<Long2> &x, const RValue<Long2> &y)
+	{
+		Constant *shuffle[2];
+		shuffle[0] = Nucleus::createConstantInt(1);
+		shuffle[1] = Nucleus::createConstantInt(3);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
+
+		return RValue<Long2>(packed);
+	}
+
+	Long2 *Long2::getThis()
+	{
+		return this;
+	}
+
+	const Type *Long2::getType()
+	{
+		return VectorType::get(Long::getType(), 2);
+	}
+
+	UInt::UInt(Argument *argument)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(argument, address);
+	}
+
+	UInt::UInt(const RValue<UShort> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	UInt::UInt(const RValue<Long> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	UInt::UInt(const RValue<Float> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createFPToSI(cast.value, UInt::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	UInt::UInt()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	UInt::UInt(int x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantInt(x), address);
+	}
+
+	UInt::UInt(unsigned int x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantInt(x), address);
+	}
+
+	UInt::UInt(const RValue<UInt> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	UInt::UInt(const RValue<Int> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	UInt::UInt(const UInt &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	UInt::UInt(const Int &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<UInt> UInt::operator=(unsigned int rhs) const
+	{
+		return RValue<UInt>(Nucleus::createStore(Nucleus::createConstantInt(rhs), address));
+	}
+
+	RValue<UInt> UInt::operator=(const RValue<UInt> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UInt> UInt::operator=(const RValue<Int> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UInt> UInt::operator=(const UInt &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UInt>(value);
+	}
+
+	RValue<UInt> UInt::operator=(const Int &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UInt>(value);
+	}
+
+	RValue<Pointer<UInt>> UInt::operator&()
+	{
+		return RValue<Pointer<UInt>>(address);
+	}
+
+	RValue<UInt> operator+(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator-(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator*(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator/(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator%(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator&(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator|(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator^(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator<<(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator>>(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
+	}
+
+	RValue<UInt> operator+=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<UInt> operator-=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<UInt> operator*=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<UInt> operator/=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<UInt> operator%=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<UInt> operator&=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<UInt> operator|=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<UInt> operator^=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<UInt> operator<<=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<UInt> operator>>=(const UInt &lhs, const RValue<UInt> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<UInt> operator+(const RValue<UInt> &val)
+	{
+		return val;
+	}
+
+	RValue<UInt> operator-(const RValue<UInt> &val)
+	{
+		return RValue<UInt>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<UInt> operator~(const RValue<UInt> &val)
+	{
+		return RValue<UInt>(Nucleus::createNot(val.value));
+	}
+
+	RValue<UInt> operator++(const UInt &val, int)   // Post-increment
+	{
+		RValue<UInt> res = val;
+
+		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const UInt &operator++(const UInt &val)   // Pre-increment
+	{
+		Value *inc = Nucleus::createAdd(Nucleus::createLoad(val.address), Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<UInt> operator--(const UInt &val, int)   // Post-decrement
+	{
+		RValue<UInt> res = val;
+
+		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return res;
+	}
+
+	const UInt &operator--(const UInt &val)   // Pre-decrement
+	{
+		Value *inc = Nucleus::createSub(Nucleus::createLoad(val.address), Nucleus::createConstantInt(1));
+		Nucleus::createStore(inc, val.address);
+
+		return val;
+	}
+
+	RValue<Bool> operator<(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator<=(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>=(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator!=(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator==(const RValue<UInt> &lhs, const RValue<UInt> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
+	}
+
+//	RValue<UInt> RoundUInt(const RValue<Float> &cast)
+//	{
+//		return x86::cvtss2si(val);   // FIXME: Unsigned
+//
+//	//	return IfThenElse(val > Float(0), Int(val + Float(0.5f)), Int(val - Float(0.5f)));
+//	}
+
+	UInt *UInt::getThis()
+	{
+		return this;
+	}
+
+	const Type *UInt::getType()
+	{
+		return Type::getInt32Ty(*Nucleus::getContext());
+	}
+
+	Int2::Int2(const RValue<Int> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
+		Value *vector = Nucleus::createBitCast(extend, Int2::getType());
+		
+		Constant *shuffle[2];
+		shuffle[0] = Nucleus::createConstantInt(0);
+		shuffle[1] = Nucleus::createConstantInt(0);
+
+		Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
+
+		Nucleus::createStore(replicate, address);
+	}
+
+	Int2::Int2(const RValue<Int4> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *long2 = Nucleus::createBitCast(cast.value, Long2::getType());
+		Value *element = Nucleus::createExtractElement(long2, 0);
+		Value *int2 = Nucleus::createBitCast(element, Int2::getType());
+
+		Nucleus::createStore(int2, address);
+	}
+
+	Int2::Int2()
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Int2::Int2(int x, int y)
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[2];
+		constantVector[0] = Nucleus::createConstantInt(x);
+		constantVector[1] = Nucleus::createConstantInt(y);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 2), address);
+	}
+
+	Int2::Int2(const RValue<Int2> &rhs)
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Int2::Int2(const Int2 &rhs)
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Int2> Int2::operator=(const RValue<Int2> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Int2> Int2::operator=(const Int2 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Int2>(value);
+	}
+
+	RValue<Int2> operator+(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator-(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator*(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator/(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator%(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator&(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator|(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator^(const RValue<Int2> &lhs, const RValue<Int2> &rhs)
+	{
+		return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<Int2> operator<<(const RValue<Int2> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return x86::pslld(lhs, rhs);
+	}
+
+	RValue<Int2> operator>>(const RValue<Int2> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
+
+		return x86::psrad(lhs, rhs);
+	}
+
+	RValue<Int2> operator<<(const RValue<Int2> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return x86::pslld(lhs, rhs);
+	}
+
+	RValue<Int2> operator>>(const RValue<Int2> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
+
+		return x86::psrad(lhs, rhs);
+	}
+
+	RValue<Int2> operator+=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Int2> operator-=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Int2> operator*=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Int2> operator/=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Int2> operator%=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Int2> operator&=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<Int2> operator|=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<Int2> operator^=(const Int2 &lhs, const RValue<Int2> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Int2> operator<<=(const Int2 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Int2> operator>>=(const Int2 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Int2> operator+(const RValue<Int2> &val)
+	{
+		return val;
+	}
+
+	RValue<Int2> operator-(const RValue<Int2> &val)
+	{
+		return RValue<Int2>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<Int2> operator~(const RValue<Int2> &val)
+	{
+		return RValue<Int2>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Long1> UnpackLow(const RValue<Int2> &x, const RValue<Int2> &y)
+	{
+		Constant *shuffle[2];
+		shuffle[0] = Nucleus::createConstantInt(0);
+		shuffle[1] = Nucleus::createConstantInt(2);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
+
+		return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
+	}
+	
+	RValue<Long1> UnpackHigh(const RValue<Int2> &x, const RValue<Int2> &y)
+	{
+		Constant *shuffle[2];
+		shuffle[0] = Nucleus::createConstantInt(1);
+		shuffle[1] = Nucleus::createConstantInt(3);
+
+		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
+
+		return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
+	}
+
+	RValue<Int> Extract(const RValue<Int2> &val, int i)
+	{
+		if(false)   // FIXME: LLVM does not generate optimal code
+		{
+			return RValue<Int>(Nucleus::createExtractElement(val.value, i));
+		}
+		else
+		{
+			if(i == 0)
+			{
+				return RValue<Int>(Nucleus::createExtractElement(val.value, 0));
+			}
+			else
+			{
+				Int2 val2 = As<Int2>(UnpackHigh(val, val));
+
+				return Extract(val2, 0);
+			}
+		}
+	}
+
+	// FIXME: Crashes LLVM
+//	RValue<Int2> Insert(const RValue<Int2> &val, const RValue<Int> &element, int i)
+//	{
+//		return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, Nucleus::createConstantInt(i)));
+//	}
+
+	Int2 *Int2::getThis()
+	{
+		return this;
+	}
+
+	const Type *Int2::getType()
+	{
+		return VectorType::get(Int::getType(), 2);
+	}
+
+	UInt2::UInt2()
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	UInt2::UInt2(unsigned int x, unsigned int y)
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[2];
+		constantVector[0] = Nucleus::createConstantInt(x);
+		constantVector[1] = Nucleus::createConstantInt(y);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 2), address);
+	}
+
+	UInt2::UInt2(const RValue<UInt2> &rhs)
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	UInt2::UInt2(const UInt2 &rhs)
+	{
+	//	xy.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<UInt2> UInt2::operator=(const RValue<UInt2> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UInt2>(value);
+	}
+
+	RValue<UInt2> operator+(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator-(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator*(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator/(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator%(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator&(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator|(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator^(const RValue<UInt2> &lhs, const RValue<UInt2> &rhs)
+	{
+		return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<UInt2> operator<<(const RValue<UInt2> &lhs, unsigned char rhs)
+	{
+	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
+	}
+
+	RValue<UInt2> operator>>(const RValue<UInt2> &lhs, unsigned char rhs)
+	{
+	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
+
+		return x86::psrld(lhs, rhs);
+	}
+
+	RValue<UInt2> operator<<(const RValue<UInt2> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
+	}
+
+	RValue<UInt2> operator>>(const RValue<UInt2> &lhs, const RValue<Long1> &rhs)
+	{
+	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
+
+		return x86::psrld(lhs, rhs);
+	}
+
+	RValue<UInt2> operator+=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<UInt2> operator-=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<UInt2> operator*=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<UInt2> operator/=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<UInt2> operator%=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<UInt2> operator&=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<UInt2> operator|=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<UInt2> operator^=(const UInt2 &lhs, const RValue<UInt2> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<UInt2> operator<<=(const UInt2 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<UInt2> operator>>=(const UInt2 &lhs, const RValue<Long1> &rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<UInt2> operator+(const RValue<UInt2> &val)
+	{
+		return val;
+	}
+
+	RValue<UInt2> operator-(const RValue<UInt2> &val)
+	{
+		return RValue<UInt2>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<UInt2> operator~(const RValue<UInt2> &val)
+	{
+		return RValue<UInt2>(Nucleus::createNot(val.value));
+	}
+
+	UInt2 *UInt2::getThis()
+	{
+		return this;
+	}
+
+	const Type *UInt2::getType()
+	{
+		return Int2::getType();
+	}
+
+	Int4::Int4(const RValue<Float4> &cast)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
+	//	Value *xyzw = x86::cvttps2dq(cast).value;
+
+		Nucleus::createStore(xyzw, address);
+	}
+
+	Int4::Int4()
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Int4::Int4(int xyzw)
+	{
+		constant(xyzw, xyzw, xyzw, xyzw);
+	}
+
+	Int4::Int4(int x, int yzw)
+	{
+		constant(x, yzw, yzw, yzw);
+	}
+
+	Int4::Int4(int x, int y, int zw)
+	{
+		constant(x, y, zw, zw);
+	}
+
+	Int4::Int4(int x, int y, int z, int w)
+	{
+		constant(x, y, z, w);
+	}
+
+	void Int4::constant(int x, int y, int z, int w)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[4];
+		constantVector[0] = Nucleus::createConstantInt(x);
+		constantVector[1] = Nucleus::createConstantInt(y);
+		constantVector[2] = Nucleus::createConstantInt(z);
+		constantVector[3] = Nucleus::createConstantInt(w);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 4), address);
+	}
+
+	Int4::Int4(const RValue<Int4> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Int4::Int4(const Int4 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Int4> Int4::operator=(const RValue<Int4> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Int4> Int4::operator=(const Int4 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Int4>(value);
+	}
+
+	RValue<Int4> operator+(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator-(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator*(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator/(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator%(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator&(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator|(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator^(const RValue<Int4> &lhs, const RValue<Int4> &rhs)
+	{
+		return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<Int4> operator<<(const RValue<Int4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return x86::pslld(lhs, rhs);
+	}
+
+	RValue<Int4> operator>>(const RValue<Int4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
+
+		return x86::psrad(lhs, rhs);
+	}
+
+	RValue<Int4> operator+=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Int4> operator-=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Int4> operator*=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Int4> operator/=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Int4> operator%=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Int4> operator&=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<Int4> operator|=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<Int4> operator^=(const Int4 &lhs, const RValue<Int4> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<Int4> operator+(const RValue<Int4> &val)
+	{
+		return val;
+	}
+
+	RValue<Int4> operator-(const RValue<Int4> &val)
+	{
+		return RValue<Int4>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<Int4> operator~(const RValue<Int4> &val)
+	{
+		return RValue<Int4>(Nucleus::createNot(val.value));
+	}
+
+	RValue<Int4> RoundInt(const RValue<Float4> &cast)
+	{
+		return x86::cvtps2dq(cast);
+	}
+
+	RValue<Short8> Pack(const RValue<Int4> &x, const RValue<Int4> &y)
+	{
+		return x86::packssdw(x, y);
+	}
+
+	RValue<Int4> Concatenate(const RValue<Int2> &lo, const RValue<Int2> &hi)
+	{
+		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
+		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+
+		Value *long2 = UndefValue::get(Long2::getType());
+		long2 = Nucleus::createInsertElement(long2, loLong, 0);
+		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
+		Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
+
+		return RValue<Int4>(int4);
+	}
+
+	RValue<Int> Extract(const RValue<Int4> &x, int i)
+	{
+		return RValue<Int>(Nucleus::createExtractElement(x.value, i));
+	}
+
+	RValue<Int4> Insert(const RValue<Int4> &x, const RValue<Int> &element, int i)
+	{
+		return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
+	}
+
+	RValue<Int> SignMask(const RValue<Int4> &x)
+	{
+		return x86::movmskps(As<Float4>(x));
+	}
+
+	RValue<Int4> Swizzle(const RValue<Int4> &x, unsigned char select)
+	{
+		return RValue<Int4>(Nucleus::createSwizzle(x.value, select));
+	}
+
+	Int4 *Int4::getThis()
+	{
+		return this;
+	}
+
+	const Type *Int4::getType()
+	{
+		return VectorType::get(Int::getType(), 4);
+	}
+
+	UInt4::UInt4(const RValue<Float4> &cast)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
+
+		Nucleus::createStore(xyzw, address);
+	}
+
+	UInt4::UInt4()
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	UInt4::UInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[4];
+		constantVector[0] = Nucleus::createConstantInt(x);
+		constantVector[1] = Nucleus::createConstantInt(y);
+		constantVector[2] = Nucleus::createConstantInt(z);
+		constantVector[3] = Nucleus::createConstantInt(w);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 4), address);
+	}
+
+	UInt4::UInt4(const RValue<UInt4> &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	UInt4::UInt4(const UInt4 &rhs)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<UInt4> UInt4::operator=(const RValue<UInt4> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<UInt4>(value);
+	}
+
+	RValue<UInt4> operator+(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator-(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator*(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator/(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator%(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator&(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator|(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator^(const RValue<UInt4> &lhs, const RValue<UInt4> &rhs)
+	{
+		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
+	}
+
+	RValue<UInt4> operator<<(const RValue<UInt4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
+
+		return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
+	}
+
+	RValue<UInt4> operator>>(const RValue<UInt4> &lhs, unsigned char rhs)
+	{
+	//	return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
+
+		return x86::psrld(lhs, rhs);
+	}
+
+	RValue<UInt4> operator+=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<UInt4> operator-=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<UInt4> operator*=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<UInt4> operator/=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<UInt4> operator%=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<UInt4> operator&=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs & rhs;
+	}
+
+	RValue<UInt4> operator|=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs | rhs;
+	}
+
+	RValue<UInt4> operator^=(const UInt4 &lhs, const RValue<UInt4> &rhs)
+	{
+		return lhs = lhs ^ rhs;
+	}
+
+	RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs << rhs;
+	}
+
+	RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs)
+	{
+		return lhs = lhs >> rhs;
+	}
+
+	RValue<UInt4> operator+(const RValue<UInt4> &val)
+	{
+		return val;
+	}
+
+	RValue<UInt4> operator-(const RValue<UInt4> &val)
+	{
+		return RValue<UInt4>(Nucleus::createNeg(val.value));
+	}
+
+	RValue<UInt4> operator~(const RValue<UInt4> &val)
+	{
+		return RValue<UInt4>(Nucleus::createNot(val.value));
+	}
+
+	RValue<UShort8> Pack(const RValue<UInt4> &x, const RValue<UInt4> &y)
+	{
+		return x86::packusdw(x, y);   // FIXME: Fallback required
+	}
+
+	RValue<UInt4> Concatenate(const RValue<UInt2> &lo, const RValue<UInt2> &hi)
+	{
+		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
+		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
+
+		Value *long2 = UndefValue::get(Long2::getType());
+		long2 = Nucleus::createInsertElement(long2, loLong, 0);
+		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
+		Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
+
+		return RValue<UInt4>(uint4);
+	}
+
+	UInt4 *UInt4::getThis()
+	{
+		return this;
+	}
+
+	const Type *UInt4::getType()
+	{
+		return VectorType::get(UInt::getType(), 4);
+	}
+
+	Float::Float(const RValue<Int> &cast)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
+
+		Nucleus::createStore(integer, address);
+	}
+
+	Float::Float()
+	{
+		address = Nucleus::allocateStackVariable(getType());
+	}
+
+	Float::Float(float x)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(Nucleus::createConstantFloat(x), address);
+	}
+
+	Float::Float(const RValue<Float> &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Float::Float(const Float &rhs)
+	{
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	RValue<Float> Float::operator=(const RValue<Float> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Float> Float::operator=(const Float &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Float>(value);
+	}
+
+	RValue<Pointer<Float>> Float::operator&()
+	{
+		return RValue<Pointer<Float>>(address);
+	}
+
+	RValue<Float> operator+(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Float> operator-(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
+	}
+
+	RValue<Float> operator*(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
+	}
+
+	RValue<Float> operator/(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Float> operator+=(const Float &lhs, const RValue<Float> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Float> operator-=(const Float &lhs, const RValue<Float> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Float> operator*=(const Float &lhs, const RValue<Float> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Float> operator/=(const Float &lhs, const RValue<Float> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Float> operator+(const RValue<Float> &val)
+	{
+		return val;
+	}
+
+	RValue<Float> operator-(const RValue<Float> &val)
+	{
+		return RValue<Float>(Nucleus::createFNeg(val.value));
+	}
+
+	RValue<Bool> operator<(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator<=(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator>=(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator!=(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
+	}
+
+	RValue<Bool> operator==(const RValue<Float> &lhs, const RValue<Float> &rhs)
+	{
+		return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
+	}
+
+	RValue<Float> Abs(const RValue<Float> &x)
+	{
+		return IfThenElse(x > Float(0), x, -x);
+	}
+
+	RValue<Float> Max(const RValue<Float> &x, const RValue<Float> &y)
+	{
+		return IfThenElse(x > y, x, y);
+	}
+
+	RValue<Float> Min(const RValue<Float> &x, const RValue<Float> &y)
+	{
+		return IfThenElse(x < y, x, y);
+	}
+
+	RValue<Float> Rcp_pp(const RValue<Float> &x)
+	{
+		return x86::rcpss(x);
+	}
+	
+	RValue<Float> RcpSqrt_pp(const RValue<Float> &x)
+	{
+		return x86::rsqrtss(x);
+	}
+
+	RValue<Float> Sqrt(const RValue<Float> &x)
+	{
+		return x86::sqrtss(x);
+	}
+
+	RValue<Float> Fraction(const RValue<Float> &x)
+	{
+		if(CPUID::supportsSSE4_1())
+		{
+			return x - x86::floorss(x);
+		}
+		else
+		{
+			return Float4(Fraction(Float4(x))).x;
+		}
+	}
+
+	RValue<Float> Floor(const RValue<Float> &x)
+	{
+		if(CPUID::supportsSSE4_1())
+		{
+			return x86::floorss(x);
+		}
+		else
+		{
+			return Float4(Floor(Float4(x))).x;
+		}
+	}
+
+	Float *Float::getThis()
+	{
+		return this;
+	}
+
+	const Type *Float::getType()
+	{
+		return Type::getFloatTy(*Nucleus::getContext());
+	}
+
+	Float2::Float2(const RValue<Float4> &cast)
+	{
+	//	xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *int64x2 = Nucleus::createBitCast(cast.value, Long2::getType());
+		Value *int64 = Nucleus::createExtractElement(int64x2, 0);
+		Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
+
+		Nucleus::createStore(float2, address);
+	}
+
+	Float2 *Float2::getThis()
+	{
+		return this;
+	}
+
+	const Type *Float2::getType()
+	{
+		return VectorType::get(Float::getType(), 2);
+	}
+
+	Float4::Float4(const RValue<Byte4> &cast)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		#if 0
+			Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());   // FIXME: Crashes
+		#elif 0
+			Value *vector = Nucleus::createLoad(address);
+
+			Value *i8x = Nucleus::createExtractElement(cast.value, 0);
+			Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
+			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
+
+			Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
+			Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
+			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
+
+			Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
+			Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
+			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
+
+			Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
+			Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
+			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
+		#else
+			Value *x = Nucleus::createBitCast(cast.value, Int::getType());
+			Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
+
+			Value *e;
+
+			if(CPUID::supportsSSE4_1())
+			{
+				e = x86::pmovzxbd(RValue<Int4>(a)).value;
+			}
+			else
+			{
+				Constant *swizzle[16];
+				swizzle[0] = Nucleus::createConstantInt(0);
+				swizzle[1] = Nucleus::createConstantInt(16);
+				swizzle[2] = Nucleus::createConstantInt(1);
+				swizzle[3] = Nucleus::createConstantInt(17);
+				swizzle[4] = Nucleus::createConstantInt(2);
+				swizzle[5] = Nucleus::createConstantInt(18);
+				swizzle[6] = Nucleus::createConstantInt(3);
+				swizzle[7] = Nucleus::createConstantInt(19);
+				swizzle[8] = Nucleus::createConstantInt(4);
+				swizzle[9] = Nucleus::createConstantInt(20);
+				swizzle[10] = Nucleus::createConstantInt(5);
+				swizzle[11] = Nucleus::createConstantInt(21);
+				swizzle[12] = Nucleus::createConstantInt(6);
+				swizzle[13] = Nucleus::createConstantInt(22);
+				swizzle[14] = Nucleus::createConstantInt(7);
+				swizzle[15] = Nucleus::createConstantInt(23);
+
+				Value *b = Nucleus::createBitCast(a, Byte16::getType());
+				Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), Nucleus::createConstantVector(swizzle, 16));
+
+				Constant *swizzle2[8];
+				swizzle2[0] = Nucleus::createConstantInt(0);
+				swizzle2[1] = Nucleus::createConstantInt(8);
+				swizzle2[2] = Nucleus::createConstantInt(1);
+				swizzle2[3] = Nucleus::createConstantInt(9);
+				swizzle2[4] = Nucleus::createConstantInt(2);
+				swizzle2[5] = Nucleus::createConstantInt(10);
+				swizzle2[6] = Nucleus::createConstantInt(3);
+				swizzle2[7] = Nucleus::createConstantInt(11);
+
+				Value *d = Nucleus::createBitCast(c, Short8::getType());
+				e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle2, 8));
+			}
+
+			Value *f = Nucleus::createBitCast(e, Int4::getType());
+		//	Value *g = Nucleus::createSIToFP(f, Float4::getType());
+			Value *g = x86::cvtdq2ps(RValue<Int4>(f)).value;
+			Value *xyzw = g;
+		#endif
+		
+		Nucleus::createStore(xyzw, address);
+	}
+
+	Float4::Float4(const RValue<SByte4> &cast)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		#if 0
+			Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());   // FIXME: Crashes
+		#elif 0
+			Value *vector = Nucleus::createLoad(address);
+
+			Value *i8x = Nucleus::createExtractElement(cast.value, 0);
+			Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
+			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
+
+			Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
+			Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
+			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
+
+			Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
+			Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
+			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
+
+			Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
+			Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
+			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
+		#else
+			Value *x = Nucleus::createBitCast(cast.value, Int::getType());
+			Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
+
+			Value *g;
+
+			if(CPUID::supportsSSE4_1())
+			{
+				g = x86::pmovsxbd(RValue<Int4>(a)).value;
+			}
+			else
+			{
+				Constant *swizzle[16];
+				swizzle[0] = Nucleus::createConstantInt(0);
+				swizzle[1] = Nucleus::createConstantInt(0);
+				swizzle[2] = Nucleus::createConstantInt(1);
+				swizzle[3] = Nucleus::createConstantInt(1);
+				swizzle[4] = Nucleus::createConstantInt(2);
+				swizzle[5] = Nucleus::createConstantInt(2);
+				swizzle[6] = Nucleus::createConstantInt(3);
+				swizzle[7] = Nucleus::createConstantInt(3);
+				swizzle[8] = Nucleus::createConstantInt(4);
+				swizzle[9] = Nucleus::createConstantInt(4);
+				swizzle[10] = Nucleus::createConstantInt(5);
+				swizzle[11] = Nucleus::createConstantInt(5);
+				swizzle[12] = Nucleus::createConstantInt(6);
+				swizzle[13] = Nucleus::createConstantInt(6);
+				swizzle[14] = Nucleus::createConstantInt(7);
+				swizzle[15] = Nucleus::createConstantInt(7);
+
+				Value *b = Nucleus::createBitCast(a, Byte16::getType());
+				Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 16));
+
+				Constant *swizzle2[8];
+				swizzle2[0] = Nucleus::createConstantInt(0);
+				swizzle2[1] = Nucleus::createConstantInt(0);
+				swizzle2[2] = Nucleus::createConstantInt(1);
+				swizzle2[3] = Nucleus::createConstantInt(1);
+				swizzle2[4] = Nucleus::createConstantInt(2);
+				swizzle2[5] = Nucleus::createConstantInt(2);
+				swizzle2[6] = Nucleus::createConstantInt(3);
+				swizzle2[7] = Nucleus::createConstantInt(3);
+
+				Value *d = Nucleus::createBitCast(c, Short8::getType());
+				Value *e = Nucleus::createShuffleVector(d, d, Nucleus::createConstantVector(swizzle2, 8));
+
+				Value *f = Nucleus::createBitCast(e, Int4::getType());
+			//	g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
+				g = x86::psrad(RValue<Int4>(f), 24).value;
+			}
+
+		//	Value *h = Nucleus::createSIToFP(g, Float4::getType());
+			Value *h = x86::cvtdq2ps(RValue<Int4>(g)).value;
+			Value *xyzw = h;
+		#endif
+		
+		Nucleus::createStore(xyzw, address);
+	}
+
+	Float4::Float4(const RValue<Short4> &cast)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		#if 0
+			Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());   // FIXME: Crashes
+		#elif 0
+			Value *vector = Nucleus::createLoad(address);
+
+			Value *i16x = Nucleus::createExtractElement(cast.value, 0);
+			Value *f32x = Nucleus::createSIToFP(i16x, Float::getType());
+			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
+
+			Value *i16y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
+			Value *f32y = Nucleus::createSIToFP(i16y, Float::getType());
+			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
+
+			Value *i16z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
+			Value *f32z = Nucleus::createSIToFP(i16z, Float::getType());
+			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
+
+			Value *i16w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
+			Value *f32w = Nucleus::createSIToFP(i16w, Float::getType());
+			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
+		#else
+			Value *long2 = UndefValue::get(Long2::getType());
+			Value *element = Nucleus::createBitCast(cast.value, Long::getType());
+			long2 = Nucleus::createInsertElement(long2, element, 0);
+			RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
+
+			Value *xyzw;
+
+			if(CPUID::supportsSSE4_1())
+			{
+				Value *c = x86::pmovsxwd(vector).value;
+
+				// xyzw = Nucleus::createSIToFP(d, Float4::getType());
+				xyzw = x86::cvtdq2ps(RValue<Int4>(c)).value;
+			}
+			else
+			{
+				Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
+
+				Constant *swizzle[8];
+				swizzle[0] = Nucleus::createConstantInt(0);
+				swizzle[1] = Nucleus::createConstantInt(0);
+				swizzle[2] = Nucleus::createConstantInt(1);
+				swizzle[3] = Nucleus::createConstantInt(1);
+				swizzle[4] = Nucleus::createConstantInt(2);
+				swizzle[5] = Nucleus::createConstantInt(2);
+				swizzle[6] = Nucleus::createConstantInt(3);
+				swizzle[7] = Nucleus::createConstantInt(3);
+				
+				Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 8));
+				Value *d = Nucleus::createBitCast(c, Int4::getType());
+
+				// Value *e = Nucleus::createSIToFP(d, Float4::getType());
+				Value *e = x86::cvtdq2ps(RValue<Int4>(d)).value;
+				
+				Constant *constantVector[4];
+				constantVector[0] = Nucleus::createConstantFloat(1.0f / (1 << 16));
+				constantVector[1] = Nucleus::createConstantFloat(1.0f / (1 << 16));
+				constantVector[2] = Nucleus::createConstantFloat(1.0f / (1 << 16));
+				constantVector[3] = Nucleus::createConstantFloat(1.0f / (1 << 16));
+
+				xyzw = Nucleus::createFMul(e, Nucleus::createConstantVector(constantVector, 4));
+			}
+		#endif
+		
+		Nucleus::createStore(xyzw, address);
+	}
+
+	Float4::Float4(const RValue<UShort4> &cast)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		#if 0
+			Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());   // FIXME: Crashes
+		#elif 0
+			Value *vector = Nucleus::createLoad(address);
+
+			Value *i16x = Nucleus::createExtractElement(cast.value, 0);
+			Value *f32x = Nucleus::createUIToFP(i16x, Float::getType());
+			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
+
+			Value *i16y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
+			Value *f32y = Nucleus::createUIToFP(i16y, Float::getType());
+			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
+
+			Value *i16z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
+			Value *f32z = Nucleus::createUIToFP(i16z, Float::getType());
+			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
+
+			Value *i16w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
+			Value *f32w = Nucleus::createUIToFP(i16w, Float::getType());
+			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
+		#else
+			Value *long2 = UndefValue::get(Long2::getType());
+			Value *element = Nucleus::createBitCast(cast.value, Long::getType());
+			long2 = Nucleus::createInsertElement(long2, element, 0);
+			RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
+
+			Value *c;
+				
+			if(CPUID::supportsSSE4_1())
+			{
+				c = x86::pmovzxwd(RValue<Int4>(vector)).value;
+			}
+			else
+			{
+				Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
+
+				Constant *swizzle[8];
+				swizzle[0] = Nucleus::createConstantInt(0);
+				swizzle[1] = Nucleus::createConstantInt(8);
+				swizzle[2] = Nucleus::createConstantInt(1);
+				swizzle[3] = Nucleus::createConstantInt(9);
+				swizzle[4] = Nucleus::createConstantInt(2);
+				swizzle[5] = Nucleus::createConstantInt(10);
+				swizzle[6] = Nucleus::createConstantInt(3);
+				swizzle[7] = Nucleus::createConstantInt(11);
+
+				c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle, 8));
+			}
+
+			Value *d = Nucleus::createBitCast(c, Int4::getType());
+		//	Value *e = Nucleus::createSIToFP(d, Float4::getType());
+			Value *e = x86::cvtdq2ps(RValue<Int4>(d)).value;
+			Value *xyzw = e;
+		#endif
+		
+		Nucleus::createStore(xyzw, address);
+	}
+
+	Float4::Float4(const RValue<Int4> &cast)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
+	//	Value *xyzw = x86::cvtdq2ps(cast).value;
+
+		Nucleus::createStore(xyzw, address);
+	}
+
+	Float4::Float4(const RValue<UInt4> &cast)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());
+
+		Nucleus::createStore(xyzw, address);
+	}
+
+	Float4::Float4()
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+	}
+	
+	Float4::Float4(float xyzw)
+	{
+		constant(xyzw, xyzw, xyzw, xyzw);
+	}
+
+	Float4::Float4(float x, float yzw)
+	{
+		constant(x, yzw, yzw, yzw);
+	}
+
+	Float4::Float4(float x, float y, float zw)
+	{
+		constant(x, y, zw, zw);
+	}
+
+	Float4::Float4(float x, float y, float z, float w)
+	{
+		constant(x, y, z, w);
+	}
+
+	void Float4::constant(float x, float y, float z, float w)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Constant *constantVector[4];
+		constantVector[0] = Nucleus::createConstantFloat(x);
+		constantVector[1] = Nucleus::createConstantFloat(y);
+		constantVector[2] = Nucleus::createConstantFloat(z);
+		constantVector[3] = Nucleus::createConstantFloat(w);
+
+		Nucleus::createStore(Nucleus::createConstantVector(constantVector, 4), address);
+	}
+
+	Float4::Float4(const RValue<Float4> &rhs)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Nucleus::createStore(rhs.value, address);
+	}
+
+	Float4::Float4(const Float4 &rhs)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+	}
+
+	Float4::Float4(const RValue<Float> &rhs)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *vector = Nucleus::createLoad(address);
+		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
+
+		Constant *swizzle[4];
+		swizzle[0] = Nucleus::createConstantInt(0);
+		swizzle[1] = Nucleus::createConstantInt(0);
+		swizzle[2] = Nucleus::createConstantInt(0);
+		swizzle[3] = Nucleus::createConstantInt(0);
+
+		Value *replicate = Nucleus::createShuffleVector(insert, UndefValue::get(Float4::getType()), Nucleus::createConstantVector(swizzle, 4));
+
+		Nucleus::createStore(replicate, address);
+	}
+
+	Float4::Float4(const Float &rhs)
+	{
+		xyzw.parent = this;
+		address = Nucleus::allocateStackVariable(getType());
+
+		Value *vector = Nucleus::createLoad(address);
+		Value *element = Nucleus::createLoad(rhs.address);
+		Value *insert = Nucleus::createInsertElement(vector, element, 0);
+
+		Constant *swizzle[4];
+		swizzle[0] = Nucleus::createConstantInt(0);
+		swizzle[1] = Nucleus::createConstantInt(0);
+		swizzle[2] = Nucleus::createConstantInt(0);
+		swizzle[3] = Nucleus::createConstantInt(0);
+
+		Value *replicate = Nucleus::createShuffleVector(insert, UndefValue::get(Float4::getType()), Nucleus::createConstantVector(swizzle, 4));
+
+		Nucleus::createStore(replicate, address);
+	}
+
+	RValue<Float4> Float4::operator=(float x) const
+	{
+		return *this = Float4(x, x, x, x);
+	}
+
+	RValue<Float4> Float4::operator=(const RValue<Float4> &rhs) const
+	{
+		Nucleus::createStore(rhs.value, address);
+
+		return rhs;
+	}
+
+	RValue<Float4> Float4::operator=(const Float4 &rhs) const
+	{
+		Value *value = Nucleus::createLoad(rhs.address);
+		Nucleus::createStore(value, address);
+
+		return RValue<Float4>(value);
+	}
+
+	RValue<Float4> Float4::operator=(const RValue<Float> &rhs) const
+	{
+		return *this = Float4(rhs);
+	}
+
+	RValue<Float4> Float4::operator=(const Float &rhs) const
+	{
+		return *this = Float4(rhs);
+	}
+
+	RValue<Pointer<Float4>> Float4::operator&()
+	{
+		return RValue<Pointer<Float4>>(address);
+	}
+
+	RValue<Float4> operator+(const RValue<Float4> &lhs, const RValue<Float4> &rhs)
+	{
+		return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
+	}
+
+	RValue<Float4> operator-(const RValue<Float4> &lhs, const RValue<Float4> &rhs)
+	{
+		return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
+	}
+
+	RValue<Float4> operator*(const RValue<Float4> &lhs, const RValue<Float4> &rhs)
+	{
+		return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
+	}
+
+	RValue<Float4> operator/(const RValue<Float4> &lhs, const RValue<Float4> &rhs)
+	{
+		return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
+	}
+
+	RValue<Float4> operator%(const RValue<Float4> &lhs, const RValue<Float4> &rhs)
+	{
+		return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
+	}
+
+	RValue<Float4> operator+=(const Float4 &lhs, const RValue<Float4> &rhs)
+	{
+		return lhs = lhs + rhs;
+	}
+
+	RValue<Float4> operator-=(const Float4 &lhs, const RValue<Float4> &rhs)
+	{
+		return lhs = lhs - rhs;
+	}
+
+	RValue<Float4> operator*=(const Float4 &lhs, const RValue<Float4> &rhs)
+	{
+		return lhs = lhs * rhs;
+	}
+
+	RValue<Float4> operator/=(const Float4 &lhs, const RValue<Float4> &rhs)
+	{
+		return lhs = lhs / rhs;
+	}
+
+	RValue<Float4> operator%=(const Float4 &lhs, const RValue<Float4> &rhs)
+	{
+		return lhs = lhs % rhs;
+	}
+
+	RValue<Float4> operator+(const RValue<Float4> &val)
+	{
+		return val;
+	}
+
+	RValue<Float4> operator-(const RValue<Float4> &val)
+	{
+		return RValue<Float4>(Nucleus::createFNeg(val.value));
+	}
+
+	RValue<Float4> Abs(const RValue<Float4> &x)
+	{
+		Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
+
+		Constant *constantVector[4];
+		constantVector[0] = Nucleus::createConstantInt(0x7FFFFFFF);
+		constantVector[1] = Nucleus::createConstantInt(0x7FFFFFFF);
+		constantVector[2] = Nucleus::createConstantInt(0x7FFFFFFF);
+		constantVector[3] = Nucleus::createConstantInt(0x7FFFFFFF);
+
+		Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, 4));
+
+		return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
+	}
+
+	RValue<Float4> Max(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+		return x86::maxps(x, y);
+	}
+
+	RValue<Float4> Min(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+		return x86::minps(x, y);
+	}
+
+	RValue<Float4> Rcp_pp(const RValue<Float4> &x)
+	{
+		return x86::rcpps(x);
+	}
+	
+	RValue<Float4> RcpSqrt_pp(const RValue<Float4> &x)
+	{
+		return x86::rsqrtps(x);
+	}
+
+	RValue<Float4> Sqrt(const RValue<Float4> &x)
+	{
+		return x86::sqrtps(x);
+	}
+
+	RValue<Float4> Insert(const Float4 &val, const RValue<Float> &element, int i)
+	{
+		llvm::Value *value = Nucleus::createLoad(val.address);
+		llvm::Value *insert = Nucleus::createInsertElement(value, element.value, i);
+
+		val = RValue<Float4>(insert);
+
+		return val;
+	}
+
+	RValue<Float> Extract(const RValue<Float4> &x, int i)
+	{
+		return RValue<Float>(Nucleus::createExtractElement(x.value, i));
+	}
+
+	RValue<Float4> Swizzle(const RValue<Float4> &x, unsigned char select)
+	{
+		return RValue<Float4>(Nucleus::createSwizzle(x.value, select));
+	}
+
+	RValue<Float4> ShuffleLowHigh(const RValue<Float4> &x, const RValue<Float4> &y, unsigned char imm)
+	{
+		Constant *shuffle[4];
+		shuffle[0] = Nucleus::createConstantInt(((imm >> 0) & 0x03) + 0);
+		shuffle[1] = Nucleus::createConstantInt(((imm >> 2) & 0x03) + 0);
+		shuffle[2] = Nucleus::createConstantInt(((imm >> 4) & 0x03) + 4);
+		shuffle[3] = Nucleus::createConstantInt(((imm >> 6) & 0x03) + 4);
+
+		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
+	}
+
+	RValue<Float4> UnpackLow(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+		Constant *shuffle[4];
+		shuffle[0] = Nucleus::createConstantInt(0);
+		shuffle[1] = Nucleus::createConstantInt(4);
+		shuffle[2] = Nucleus::createConstantInt(1);
+		shuffle[3] = Nucleus::createConstantInt(5);
+
+		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
+	}
+
+	RValue<Float4> UnpackHigh(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+		Constant *shuffle[4];
+		shuffle[0] = Nucleus::createConstantInt(2);
+		shuffle[1] = Nucleus::createConstantInt(6);
+		shuffle[2] = Nucleus::createConstantInt(3);
+		shuffle[3] = Nucleus::createConstantInt(7);
+
+		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
+	}
+	
+	RValue<Float4> Mask(Float4 &lhs, const RValue<Float4> &rhs, unsigned char select)
+	{
+		Value *vector = Nucleus::createLoad(lhs.address);
+		Value *shuffle = Nucleus::createMask(vector, rhs.value, select);
+		Nucleus::createStore(shuffle, lhs.address);
+
+		return RValue<Float4>(shuffle);
+	}
+
+	RValue<Int> SignMask(const RValue<Float4> &x)
+	{
+		return x86::movmskps(x);
+	}
+
+	RValue<Int4> CmpEQ(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+	//	return As<Int4>(x86::cmpeqps(x, y));
+		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
+	}
+
+	RValue<Int4> CmpLT(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+	//	return As<Int4>(x86::cmpltps(x, y));
+		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
+	}
+
+	RValue<Int4> CmpLE(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+	//	return As<Int4>(x86::cmpleps(x, y));
+		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
+	}
+
+	RValue<Int4> CmpNEQ(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+	//	return As<Int4>(x86::cmpneqps(x, y));
+		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
+	}
+
+	RValue<Int4> CmpNLT(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+	//	return As<Int4>(x86::cmpnltps(x, y));
+		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
+	}
+
+	RValue<Int4> CmpNLE(const RValue<Float4> &x, const RValue<Float4> &y)
+	{
+	//	return As<Int4>(x86::cmpnleps(x, y));
+		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
+	}
+
+	RValue<Float4> Fraction(const RValue<Float4> &x)
+	{
+		if(CPUID::supportsSSE4_1())
+		{
+			return x - x86::floorps(x);
+		}
+		else
+		{
+			Float4 frc = x - Float4(Int4(x));   // Signed fraction
+
+			return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0, 0, 0, 0), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
+		}
+	}
+
+	RValue<Float4> Floor(const RValue<Float4> &x)
+	{
+		if(CPUID::supportsSSE4_1())
+		{
+			return x86::floorps(x);
+		}
+		else
+		{
+			Float4 trunc = Float4(Int4(x));   // Rounded toward zero
+
+			return trunc + As<Float4>(As<Int4>(CmpNLE(Float4(0, 0, 0, 0), trunc)) & As<Int4>(Float4(1, 1, 1, 1)));
+		}
+	}
+
+	Float4 *Float4::getThis()
+	{
+		return this;
+	}
+
+	const Type *Float4::getType()
+	{
+		return VectorType::get(Float::getType(), 4);
+	}
+
+	RValue<Pointer<Byte>> operator+(const RValue<Pointer<Byte>> &lhs, int offset)
+	{
+		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Nucleus::createConstantInt(offset)));
+	}
+
+	RValue<Pointer<Byte>> operator+(const RValue<Pointer<Byte>> &lhs, const RValue<Int> &offset)
+	{
+		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
+	}
+
+	RValue<Pointer<Byte>> operator+(const RValue<Pointer<Byte>> &lhs, const RValue<UInt> &offset)
+	{
+		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
+	}
+
+	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset)
+	{
+		return lhs = lhs + offset;
+	}
+
+	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, const RValue<Int> &offset)
+	{
+		return lhs = lhs + offset;
+	}
+
+	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, const RValue<UInt> &offset)
+	{
+		return lhs = lhs + offset;
+	}
+
+	RValue<Pointer<Byte>> operator-(const RValue<Pointer<Byte>> &lhs, int offset)
+	{
+		return lhs + -offset;
+	}
+
+	RValue<Pointer<Byte>> operator-(const RValue<Pointer<Byte>> &lhs, const RValue<Int> &offset)
+	{
+		return lhs + -offset;
+	}
+
+	RValue<Pointer<Byte>> operator-(const RValue<Pointer<Byte>> &lhs, const RValue<UInt> &offset)
+	{
+		return lhs + -offset;
+	}
+
+	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset)
+	{
+		return lhs = lhs - offset;
+	}
+
+	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, const RValue<Int> &offset)
+	{
+		return lhs = lhs - offset;
+	}
+
+	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, const RValue<UInt> &offset)
+	{
+		return lhs = lhs - offset;
+	}
+
+	void Return()
+	{
+		#if !(defined(_M_AMD64) || defined(_M_X64))
+			x86::emms();
+		#endif
+
+		Nucleus::createRetVoid();
+		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
+	}
+
+	void Return(const Int &ret)
+	{
+		#if !(defined(_M_AMD64) || defined(_M_X64))
+			x86::emms();
+		#endif
+
+		Nucleus::createRet(Nucleus::createLoad(ret.address));
+		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
+	}
+
+	BasicBlock *beginLoop()
+	{
+		BasicBlock *loopBB = Nucleus::createBasicBlock();
+
+		Nucleus::createBr(loopBB);
+		Nucleus::getBuilder()->SetInsertPoint(loopBB);
+
+		return loopBB;
+	}
+
+	bool branch(const RValue<Bool> &cmp, BasicBlock *bodyBB, BasicBlock *endBB)
+	{
+		Nucleus::createCondBr(cmp.value, bodyBB, endBB);
+		Nucleus::getBuilder()->SetInsertPoint(bodyBB);
+		
+		return true;
+	}
+
+	bool elseBlock(BasicBlock *falseBB)
+	{
+		falseBB->back().eraseFromParent();
+		Nucleus::getBuilder()->SetInsertPoint(falseBB);
+
+		return true;
+	}
+
+	RValue<Long> Ticks()
+	{
+		Module *module = Nucleus::getModule();
+		llvm::Function *rdtsc = Intrinsic::getDeclaration(module, Intrinsic::readcyclecounter);
+
+		return RValue<Long>(Nucleus::createCall(rdtsc));
+	}
+
+	void Emms()
+	{
+		x86::emms();
+	}
+}
+
+namespace sw
+{
+	namespace x86
+	{
+		RValue<Int> cvtss2si(const RValue<Float> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *cvtss2si = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtss2si);
+			
+			Float4 vector;
+			vector.x = val;
+
+			return RValue<Int>(Nucleus::createCall(cvtss2si, RValue<Float4>(vector).value));
+		}
+
+		RValue<Int2> cvtps2pi(const RValue<Float4> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *cvtps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtps2pi);
+
+			return RValue<Int2>(Nucleus::createCall(cvtps2pi, val.value));
+		}
+
+		RValue<Int2> cvttps2pi(const RValue<Float4> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *cvttps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvttps2pi);
+
+			return RValue<Int2>(Nucleus::createCall(cvttps2pi, val.value));
+		}
+
+		RValue<Int4> cvtps2dq(const RValue<Float4> &val)
+		{
+			if(CPUID::supportsSSE2())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *cvtps2dq = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_cvtps2dq);
+
+				return RValue<Int4>(Nucleus::createCall(cvtps2dq, val.value));
+			}
+			else
+			{
+				Int2 lo = x86::cvtps2pi(val);
+				Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
+				
+				return Concatenate(lo, hi);
+			}
+		}
+
+		RValue<Int4> cvttps2dq(const RValue<Float4> &val)
+		{
+			if(CPUID::supportsSSE2())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *cvttps2dq = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_cvttps2dq);
+
+				return RValue<Int4>(Nucleus::createCall(cvttps2dq, val.value));
+			}
+			else
+			{
+				Int2 lo = x86::cvttps2pi(val);
+				Int2 hi = x86::cvttps2pi(Swizzle(val, 0xEE));
+				
+				return Concatenate(lo, hi);
+			}
+		}
+
+		RValue<Float4> cvtpi2ps(const RValue<Float4> &x, const RValue<Int2> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *cvtpi2ps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtpi2ps);
+
+			return RValue<Float4>(Nucleus::createCall(cvtpi2ps, x.value, y.value));
+		}
+
+		RValue<Float4> cvtdq2ps(const RValue<Int4> &val)
+		{
+			if(CPUID::supportsSSE2())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *cvtdq2ps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_cvtdq2ps);
+
+				return RValue<Float4>(Nucleus::createCall(cvtdq2ps, val.value));
+			}
+			else
+			{
+				Int2 lo = Int2(val);
+				Int2 hi = Int2(Swizzle(val, 0xEE));
+
+				Float4 scratch1;
+				Float4 scratch2;
+
+				return Float4(Float4(x86::cvtpi2ps(scratch1, lo)).xy, Float4(x86::cvtpi2ps(scratch2, hi)).xy);
+			}
+		}
+
+		RValue<Float> rcpss(const RValue<Float> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *rcpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ss);
+
+			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
+			
+			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rcpss, vector), 0));
+		}
+
+		RValue<Float> sqrtss(const RValue<Float> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *sqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ss);
+
+			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
+			
+			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(sqrtss, vector), 0));
+		}
+
+		RValue<Float> rsqrtss(const RValue<Float> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *rsqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ss);
+			
+			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
+
+			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rsqrtss, vector), 0));
+		}
+
+		RValue<Float4> rcpps(const RValue<Float4> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *rcpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ps);
+			
+			return RValue<Float4>(Nucleus::createCall(rcpps, val.value));
+		}
+
+		RValue<Float4> sqrtps(const RValue<Float4> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *sqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ps);
+			
+			return RValue<Float4>(Nucleus::createCall(sqrtps, val.value));
+		}
+
+		RValue<Float4> rsqrtps(const RValue<Float4> &val)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *rsqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ps);
+			
+			return RValue<Float4>(Nucleus::createCall(rsqrtps, val.value));
+		}
+
+		RValue<Float4> maxps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *maxps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_max_ps);
+
+			return RValue<Float4>(Nucleus::createCall(maxps, x.value, y.value));
+		}
+
+		RValue<Float4> minps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *minps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_min_ps);
+
+			return RValue<Float4>(Nucleus::createCall(minps, x.value, y.value));
+		}
+
+		RValue<Float> roundss(const RValue<Float> &val, unsigned char imm)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *roundss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ss);
+
+			Value *undef = UndefValue::get(Float4::getType());
+			Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
+
+			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(roundss, undef, vector, Nucleus::createConstantInt(imm)), 0));
+		}
+
+		RValue<Float> floorss(const RValue<Float> &val)
+		{
+			return roundss(val, 1);
+		}
+
+		RValue<Float> ceilss(const RValue<Float> &val)
+		{
+			return roundss(val, 2);
+		}
+
+		RValue<Float4> roundps(const RValue<Float4> &val, unsigned char imm)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *roundps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ps);
+
+			return RValue<Float4>(Nucleus::createCall(roundps, val.value, Nucleus::createConstantInt(imm)));
+		}
+
+		RValue<Float4> floorps(const RValue<Float4> &val)
+		{
+			return roundps(val, 1);
+		}
+
+		RValue<Float4> ceilps(const RValue<Float4> &val)
+		{
+			return roundps(val, 2);
+		}
+
+		RValue<Float4> cmpps(const RValue<Float4> &x, const RValue<Float4> &y, unsigned char imm)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *cmpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ps);
+
+			return RValue<Float4>(Nucleus::createCall(cmpps, x.value, y.value, Nucleus::createConstantByte(imm)));
+		}
+
+		RValue<Float4> cmpeqps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 0);
+		}
+
+		RValue<Float4> cmpltps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 1);
+		}
+
+		RValue<Float4> cmpleps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 2);
+		}
+
+		RValue<Float4> cmpunordps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 3);
+		}
+
+		RValue<Float4> cmpneqps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 4);
+		}
+
+		RValue<Float4> cmpnltps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 5);
+		}
+
+		RValue<Float4> cmpnleps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 6);
+		}
+
+		RValue<Float4> cmpordps(const RValue<Float4> &x, const RValue<Float4> &y)
+		{
+			return cmpps(x, y, 7);
+		}
+
+		RValue<Float> cmpss(const RValue<Float> &x, const RValue<Float> &y, unsigned char imm)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *cmpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ss);
+
+			Value *vector1 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), x.value, 0);
+			Value *vector2 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), y.value, 0);
+
+			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(cmpss, vector1, vector2, Nucleus::createConstantByte(imm)), 0));
+		}
+
+		RValue<Float> cmpeqss(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 0);
+		}
+
+		RValue<Float> cmpltss(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 1);
+		}
+
+		RValue<Float> cmpless(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 2);
+		}
+
+		RValue<Float> cmpunordss(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 3);
+		}
+
+		RValue<Float> cmpneqss(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 4);
+		}
+
+		RValue<Float> cmpnltss(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 5);
+		}
+
+		RValue<Float> cmpnless(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 6);
+		}
+
+		RValue<Float> cmpordss(const RValue<Float> &x, const RValue<Float> &y)
+		{
+			return cmpss(x, y, 7);
+		}
+
+		RValue<Int4> pabsd(const RValue<Int4> &x, const RValue<Int4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pabsd = Intrinsic::getDeclaration(module, Intrinsic::x86_ssse3_pabs_d_128);
+
+			return RValue<Int4>(Nucleus::createCall(pabsd, x.value, y.value));
+		}
+
+		RValue<Short4> paddsw(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *paddsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_w);
+
+			return RValue<Short4>(Nucleus::createCall(paddsw, x.value, y.value));
+		}
+		
+		RValue<Short4> psubsw(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psubsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_w);
+
+			return RValue<Short4>(Nucleus::createCall(psubsw, x.value, y.value));
+		}
+
+		RValue<UShort4> paddusw(const RValue<UShort4> &x, const RValue<UShort4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *paddusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_w);
+
+			return RValue<UShort4>(Nucleus::createCall(paddusw, x.value, y.value));
+		}
+		
+		RValue<UShort4> psubusw(const RValue<UShort4> &x, const RValue<UShort4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psubusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_w);
+
+			return RValue<UShort4>(Nucleus::createCall(psubusw, x.value, y.value));
+		}
+
+		RValue<SByte8> paddsb(const RValue<SByte8> &x, const RValue<SByte8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *paddsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_b);
+
+			return RValue<SByte8>(Nucleus::createCall(paddsb, x.value, y.value));
+		}
+		
+		RValue<SByte8> psubsb(const RValue<SByte8> &x, const RValue<SByte8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psubsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_b);
+
+			return RValue<SByte8>(Nucleus::createCall(psubsb, x.value, y.value));
+		}
+		
+		RValue<Byte8> paddusb(const RValue<Byte8> &x, const RValue<Byte8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *paddusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_b);
+
+			return RValue<Byte8>(Nucleus::createCall(paddusb, x.value, y.value));
+		}
+		
+		RValue<Byte8> psubusb(const RValue<Byte8> &x, const RValue<Byte8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psubusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_b);
+
+			return RValue<Byte8>(Nucleus::createCall(psubusb, x.value, y.value));
+		}
+
+		RValue<UShort4> pavgw(const RValue<UShort4> &x, const RValue<UShort4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pavgw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pavg_w);
+
+			return RValue<UShort4>(Nucleus::createCall(pavgw, x.value, y.value));
+		}
+
+		RValue<Short4> pmaxsw(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmaxsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmaxs_w);
+
+			return RValue<Short4>(Nucleus::createCall(pmaxsw, x.value, y.value));
+		}
+
+		RValue<Short4> pminsw(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pminsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmins_w);
+
+			return RValue<Short4>(Nucleus::createCall(pminsw, x.value, y.value));
+		}
+
+		RValue<Short4> pcmpgtw(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pcmpgtw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_w);
+
+			return RValue<Short4>(Nucleus::createCall(pcmpgtw, x.value, y.value));
+		}
+
+		RValue<Short4> pcmpeqw(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pcmpeqw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_w);
+
+			return RValue<Short4>(Nucleus::createCall(pcmpeqw, x.value, y.value));
+		}
+
+		RValue<Byte8> pcmpgtb(const RValue<SByte8> &x, const RValue<SByte8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pcmpgtb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_b);
+
+			return RValue<Byte8>(Nucleus::createCall(pcmpgtb, x.value, y.value));
+		}
+
+		RValue<Byte8> pcmpeqb(const RValue<Byte8> &x, const RValue<Byte8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pcmpeqb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_b);
+
+			return RValue<Byte8>(Nucleus::createCall(pcmpeqb, x.value, y.value));
+		}
+
+		RValue<Short4> packssdw(const RValue<Int2> &x, const RValue<Int2> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packssdw);
+
+			return RValue<Short4>(Nucleus::createCall(packssdw, x.value, y.value));
+		}
+
+		RValue<Short8> packssdw(const RValue<Int4> &x, const RValue<Int4> &y)
+		{
+			if(CPUID::supportsSSE2())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_packssdw_128);
+
+				return RValue<Short8>(Nucleus::createCall(packssdw, x.value, y.value));
+			}
+			else
+			{
+				Int2 loX = Int2(x);
+				Int2 hiX = Int2(Swizzle(x, 0xEE));
+
+				Int2 loY = Int2(y);
+				Int2 hiY = Int2(Swizzle(y, 0xEE));
+				
+				Short4 lo = x86::packssdw(loX, hiX);
+				Short4 hi = x86::packssdw(loY, hiY);
+				
+				return Concatenate(lo, hi);
+			}
+		}
+
+		RValue<SByte8> packsswb(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *packsswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packsswb);
+
+			return RValue<SByte8>(Nucleus::createCall(packsswb, x.value, y.value));
+		}
+
+		RValue<Byte8> packuswb(const RValue<UShort4> &x, const RValue<UShort4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *packuswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packuswb);
+
+			return RValue<Byte8>(Nucleus::createCall(packuswb, x.value, y.value));
+		}
+
+		RValue<UShort8> packusdw(const RValue<UInt4> &x, const RValue<UInt4> &y)
+		{
+			if(CPUID::supportsSSE4_1())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *packusdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_packusdw);
+	
+				return RValue<UShort8>(Nucleus::createCall(packusdw, x.value, y.value));
+			}
+			else
+			{
+				// FIXME: Not an exact replacement!
+				return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000));
+			}
+		}
+
+		RValue<UShort4> psrlw(const RValue<UShort4> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_w);
+
+			return RValue<UShort4>(Nucleus::createCall(psrlw, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<UShort8> psrlw(const RValue<UShort8> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_w);
+
+			return RValue<UShort8>(Nucleus::createCall(psrlw, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<Short4> psraw(const RValue<Short4> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_w);
+
+			return RValue<Short4>(Nucleus::createCall(psraw, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<Short8> psraw(const RValue<Short8> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_w);
+
+			return RValue<Short8>(Nucleus::createCall(psraw, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<Short4> psllw(const RValue<Short4> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_w);
+
+			return RValue<Short4>(Nucleus::createCall(psllw, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<Short8> psllw(const RValue<Short8> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_w);
+
+			return RValue<Short8>(Nucleus::createCall(psllw, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<Int2> pslld(const RValue<Int2> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_d);
+
+			return RValue<Int2>(Nucleus::createCall(pslld, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<Int4> pslld(const RValue<Int4> &x, unsigned char y)
+		{
+			if(CPUID::supportsSSE2())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_d);
+
+				return RValue<Int4>(Nucleus::createCall(pslld, x.value, Nucleus::createConstantInt(y)));
+			}
+			else
+			{
+				Int2 lo = Int2(x);
+				Int2 hi = Int2(Swizzle(x, 0xEE));
+				
+				lo = x86::pslld(lo, y);
+				hi = x86::pslld(hi, y);
+				
+				return Concatenate(lo, hi);
+			}
+		}
+
+		RValue<Int2> psrad(const RValue<Int2> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_d);
+
+			return RValue<Int2>(Nucleus::createCall(psrad, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<Int4> psrad(const RValue<Int4> &x, unsigned char y)
+		{
+			if(CPUID::supportsSSE2())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_d);
+
+				return RValue<Int4>(Nucleus::createCall(psrad, x.value, Nucleus::createConstantInt(y)));
+			}
+			else
+			{
+				Int2 lo = Int2(x);
+				Int2 hi = Int2(Swizzle(x, 0xEE));
+				
+				lo = x86::psrad(lo, y);
+				hi = x86::psrad(hi, y);
+				
+				return Concatenate(lo, hi);
+			}
+		}
+
+		RValue<UInt2> psrld(const RValue<UInt2> &x, unsigned char y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_d);
+
+			return RValue<UInt2>(Nucleus::createCall(psrld, x.value, Nucleus::createConstantInt(y)));
+		}
+
+		RValue<UInt4> psrld(const RValue<UInt4> &x, unsigned char y)
+		{
+			if(CPUID::supportsSSE2())
+			{
+				Module *module = Nucleus::getModule();
+				llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_d);
+
+				return RValue<UInt4>(Nucleus::createCall(psrld, x.value, Nucleus::createConstantInt(y)));
+			}
+			else
+			{
+				UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
+				UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
+				
+				lo = x86::psrld(lo, y);
+				hi = x86::psrld(hi, y);
+				
+				return Concatenate(lo, hi);
+			}
+		}
+
+		RValue<UShort4> psrlw(const RValue<UShort4> &x, const RValue<Long1> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_w);
+
+			return RValue<UShort4>(Nucleus::createCall(psrlw, x.value, y.value));
+		}
+
+		RValue<Short4> psraw(const RValue<Short4> &x, const RValue<Long1> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_w);
+
+			return RValue<Short4>(Nucleus::createCall(psraw, x.value, y.value));
+		}
+
+		RValue<Short4> psllw(const RValue<Short4> &x, const RValue<Long1> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_w);
+
+			return RValue<Short4>(Nucleus::createCall(psllw, x.value, y.value));
+		}
+
+		RValue<Int2> pslld(const RValue<Int2> &x, const RValue<Long1> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_d);
+
+			return RValue<Int2>(Nucleus::createCall(pslld, x.value, y.value));
+		}
+
+		RValue<UInt2> psrld(const RValue<UInt2> &x, const RValue<Long1> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_d);
+
+			return RValue<UInt2>(Nucleus::createCall(psrld, x.value, y.value));
+		}
+
+		RValue<Int2> psrad(const RValue<Int2> &x, const RValue<Long1> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_d);
+
+			return RValue<Int2>(Nucleus::createCall(psrld, x.value, y.value));
+		}
+
+		RValue<Short4> pmulhw(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulh_w);
+
+			return RValue<Short4>(Nucleus::createCall(pmulhw, x.value, y.value));
+		}
+
+		RValue<UShort4> pmulhuw(const RValue<UShort4> &x, const RValue<UShort4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulhu_w);
+
+			return RValue<UShort4>(Nucleus::createCall(pmulhuw, x.value, y.value));
+		}
+
+		RValue<Int2> pmaddwd(const RValue<Short4> &x, const RValue<Short4> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmadd_wd);
+
+			return RValue<Int2>(Nucleus::createCall(pmaddwd, x.value, y.value));
+		}
+
+		RValue<Short8> pmulhw(const RValue<Short8> &x, const RValue<Short8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulh_w);
+
+			return RValue<Short8>(Nucleus::createCall(pmulhw, x.value, y.value));
+		}
+
+		RValue<UShort8> pmulhuw(const RValue<UShort8> &x, const RValue<UShort8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulhu_w);
+
+			return RValue<UShort8>(Nucleus::createCall(pmulhuw, x.value, y.value));
+		}
+
+		RValue<Int4> pmaddwd(const RValue<Short8> &x, const RValue<Short8> &y)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmadd_wd);
+
+			return RValue<Int4>(Nucleus::createCall(pmaddwd, x.value, y.value));
+		}
+
+		RValue<Int> movmskps(const RValue<Float4> &x)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *movmskps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_movmsk_ps);
+
+			return RValue<Int>(Nucleus::createCall(movmskps, x.value));
+		}
+
+		RValue<Int> pmovmskb(const RValue<Byte8> &x)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmovmskb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmovmskb);
+
+			return RValue<Int>(Nucleus::createCall(pmovmskb, x.value));
+		}
+
+		//RValue<Int2> movd(const RValue<Pointer<Int>> &x)
+		//{
+		//	Value *element = Nucleus::createLoad(x.value);
+
+		////	Value *int2 = UndefValue::get(Int2::getType());
+		////	int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
+
+		//	Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
+
+		//	return RValue<Int2>(int2);
+		//}
+
+		//RValue<Int2> movdq2q(const RValue<Int4> &x)
+		//{
+		//	Value *long2 = Nucleus::createBitCast(x.value, Long2::getType());
+		//	Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
+
+		//	return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
+		//}
+
+		RValue<Int4> pmovzxbd(const RValue<Int4> &x)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmovzxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxbd);
+		
+			return RValue<Int4>(Nucleus::createCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType())));
+		}
+
+		RValue<Int4> pmovsxbd(const RValue<Int4> &x)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmovsxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxbd);
+		
+			return RValue<Int4>(Nucleus::createCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType())));
+		}
+
+		RValue<Int4> pmovzxwd(const RValue<Int4> &x)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmovzxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxwd);
+		
+			return RValue<Int4>(Nucleus::createCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType())));
+		}
+
+		RValue<Int4> pmovsxwd(const RValue<Int4> &x)
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *pmovsxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxwd);
+		
+			return RValue<Int4>(Nucleus::createCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType())));
+		}
+
+		void emms()
+		{
+			Module *module = Nucleus::getModule();
+			llvm::Function *emms = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_emms);
+
+			Nucleus::createCall(emms);
+		}
+	}
+}