api/_invocation_8cpp_source.html

 // Copyright (c) 2018 the Talvos developers. All rights reserved.

 //

 // This file is distributed under a three-clause BSD license. For full license

 // terms please see the LICENSE file distributed with this source code.


 #include <array>

 #include <cassert>

 #include <cmath>

 #include <iostream>

 #include <sstream>


 #include <spirv/unified1/GLSL.std.450.h>

 #include <spirv/unified1/spirv.h>


 #include "talvos/Block.h"

 #include "talvos/Device.h"

 #include "talvos/EntryPoint.h"

 #include "talvos/Function.h"

 #include "talvos/Image.h"

 #include "talvos/Instruction.h"

 #include "talvos/Invocation.h"

 #include "talvos/Memory.h"

 #include "talvos/Module.h"

 #include "talvos/PipelineStage.h"

 #include "talvos/Type.h"

 #include "talvos/Variable.h"

 #include "talvos/Workgroup.h"


 #define OP(Index, Type) Objects[Inst->getOperand(Index)].get<Type>()


 namespace talvos

 {


 Invocation::Invocation(Device &Dev, const std::vector<Object> &InitialObjects)

     : Dev(Dev)

 {

   CurrentInstruction = nullptr;

   PrivateMemory = nullptr;

   PipelineMemory = nullptr;

   Objects = InitialObjects;

 }


 Invocation::Invocation(Device &Dev, const PipelineStage &Stage,

                        const std::vector<Object> &InitialObjects,

                        std::shared_ptr<Memory> PipelineMemory, Workgroup *Group,

                        Dim3 GlobalId)

     : Dev(Dev), Group(Group), GlobalId(GlobalId), PipelineMemory(PipelineMemory)

 {

   PrivateMemory = new Memory(Dev, MemoryScope::Invocation);


   AtBarrier = false;

   Discarded = false;

   CurrentModule = Stage.getModule();

   CurrentFunction = Stage.getEntryPoint()->getFunction();

   moveToBlock(CurrentFunction->getFirstBlockId());


   // Clone initial object values.

   Objects = InitialObjects;


   // Copy workgroup variable pointer values.

   if (Group)

   {

     for (auto V : Group->getVariables())

       Objects[V.first] = V.second;

   }


   // Set up private variables.

   for (auto V : CurrentModule->getVariables())

   {

     const Type *Ty = V->getType();

     if (Ty->getStorageClass() != SpvStorageClassPrivate)

       continue;


     // Allocate and initialize variable in private memory.

     uint64_t NumBytes = Ty->getElementType()->getSize();

     uint64_t Address = PrivateMemory->allocate(NumBytes);

     Objects[V->getId()] = Object(Ty, Address);

     if (V->getInitializer())

       Objects[V->getInitializer()].store(*PrivateMemory, Address);

   }


   Dev.reportInvocationBegin(this);

 }


 Invocation::~Invocation() { delete PrivateMemory; }


 void Invocation::execute(const talvos::Instruction *Inst)

 {

   // Dispatch instruction to handler method.

   uint16_t Opcode = Inst->getOpcode();

   switch (Opcode)

   {

 #define DISPATCH(Op, Func)                                                     \

   case Op:                                                                     \

     execute##Func(Inst);                                                       \

     break

 #define NOP(Op)                                                                \

   case Op:                                                                     \

     break


     DISPATCH(SpvOpAccessChain, AccessChain);

     DISPATCH(SpvOpAll, All);

     DISPATCH(SpvOpAny, Any);

     DISPATCH(SpvOpAtomicAnd, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicCompareExchange, AtomicCompareExchange);

     DISPATCH(SpvOpAtomicExchange, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicIAdd, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicIDecrement, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicIIncrement, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicISub, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicLoad, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicOr, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicSMax, AtomicOp<int32_t>);

     DISPATCH(SpvOpAtomicSMin, AtomicOp<int32_t>);

     DISPATCH(SpvOpAtomicStore, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicUMax, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicUMin, AtomicOp<uint32_t>);

     DISPATCH(SpvOpAtomicXor, AtomicOp<uint32_t>);

     DISPATCH(SpvOpBitcast, Bitcast);

     DISPATCH(SpvOpBitwiseAnd, BitwiseAnd);

     DISPATCH(SpvOpBitwiseOr, BitwiseOr);

     DISPATCH(SpvOpBitwiseXor, BitwiseXor);

     DISPATCH(SpvOpBranch, Branch);

     DISPATCH(SpvOpBranchConditional, BranchConditional);

     DISPATCH(SpvOpCompositeConstruct, CompositeConstruct);

     DISPATCH(SpvOpCompositeExtract, CompositeExtract);

     DISPATCH(SpvOpCompositeInsert, CompositeInsert);

     DISPATCH(SpvOpControlBarrier, ControlBarrier);

     DISPATCH(SpvOpConvertFToS, ConvertFToS);

     DISPATCH(SpvOpConvertFToU, ConvertFToU);

     DISPATCH(SpvOpConvertSToF, ConvertSToF);

     DISPATCH(SpvOpConvertUToF, ConvertUToF);

     DISPATCH(SpvOpCopyMemory, CopyMemory);

     DISPATCH(SpvOpCopyObject, CopyObject);

     DISPATCH(SpvOpDot, Dot);

     DISPATCH(SpvOpExtInst, ExtInst);

     DISPATCH(SpvOpFAdd, FAdd);

     DISPATCH(SpvOpFConvert, FConvert);

     DISPATCH(SpvOpFDiv, FDiv);

     DISPATCH(SpvOpFMod, FMod);

     DISPATCH(SpvOpFMul, FMul);

     DISPATCH(SpvOpFNegate, FNegate);

     DISPATCH(SpvOpFOrdEqual, FOrdEqual);

     DISPATCH(SpvOpFOrdGreaterThan, FOrdGreaterThan);

     DISPATCH(SpvOpFOrdGreaterThanEqual, FOrdGreaterThanEqual);

     DISPATCH(SpvOpFOrdLessThan, FOrdLessThan);

     DISPATCH(SpvOpFOrdLessThanEqual, FOrdLessThanEqual);

     DISPATCH(SpvOpFOrdNotEqual, FOrdNotEqual);

     DISPATCH(SpvOpFRem, FRem);

     DISPATCH(SpvOpFSub, FSub);

     DISPATCH(SpvOpFunctionCall, FunctionCall);

     DISPATCH(SpvOpFUnordEqual, FUnordEqual);

     DISPATCH(SpvOpFUnordGreaterThan, FUnordGreaterThan);

     DISPATCH(SpvOpFUnordGreaterThanEqual, FUnordGreaterThanEqual);

     DISPATCH(SpvOpFUnordLessThan, FUnordLessThan);

     DISPATCH(SpvOpFUnordLessThanEqual, FUnordLessThanEqual);

     DISPATCH(SpvOpFUnordNotEqual, FUnordNotEqual);

     DISPATCH(SpvOpIAdd, IAdd);

     DISPATCH(SpvOpIEqual, IEqual);

     DISPATCH(SpvOpImage, Image);

     DISPATCH(SpvOpImageFetch, ImageRead);

     DISPATCH(SpvOpImageQuerySize, ImageQuerySize);

     DISPATCH(SpvOpImageQuerySizeLod, ImageQuerySize);

     DISPATCH(SpvOpImageRead, ImageRead);

     DISPATCH(SpvOpImageSampleExplicitLod, ImageSampleExplicitLod);

     DISPATCH(SpvOpImageWrite, ImageWrite);

     DISPATCH(SpvOpIMul, IMul);

     DISPATCH(SpvOpInBoundsAccessChain, AccessChain);

     DISPATCH(SpvOpINotEqual, INotEqual);

     DISPATCH(SpvOpIsInf, IsInf);

     DISPATCH(SpvOpIsNan, IsNan);

     DISPATCH(SpvOpISub, ISub);

     DISPATCH(SpvOpKill, Kill);

     DISPATCH(SpvOpLoad, Load);

     DISPATCH(SpvOpLogicalEqual, LogicalEqual);

     DISPATCH(SpvOpLogicalNotEqual, LogicalNotEqual);

     DISPATCH(SpvOpLogicalOr, LogicalOr);

     DISPATCH(SpvOpLogicalAnd, LogicalAnd);

     DISPATCH(SpvOpLogicalNot, LogicalNot);

     DISPATCH(SpvOpMatrixTimesScalar, MatrixTimesScalar);

     DISPATCH(SpvOpMatrixTimesVector, MatrixTimesVector);

     DISPATCH(SpvOpNot, Not);

     DISPATCH(SpvOpPhi, Phi);

     DISPATCH(SpvOpPtrAccessChain, AccessChain);

     DISPATCH(SpvOpReturn, Return);

     DISPATCH(SpvOpReturnValue, ReturnValue);

     DISPATCH(SpvOpSampledImage, SampledImage);

     DISPATCH(SpvOpSConvert, SConvert);

     DISPATCH(SpvOpSDiv, SDiv);

     DISPATCH(SpvOpSelect, Select);

     DISPATCH(SpvOpSGreaterThan, SGreaterThan);

     DISPATCH(SpvOpSGreaterThanEqual, SGreaterThanEqual);

     DISPATCH(SpvOpShiftLeftLogical, ShiftLeftLogical);

     DISPATCH(SpvOpShiftRightArithmetic, ShiftRightArithmetic);

     DISPATCH(SpvOpShiftRightLogical, ShiftRightLogical);

     DISPATCH(SpvOpSLessThan, SLessThan);

     DISPATCH(SpvOpSLessThanEqual, SLessThanEqual);

     DISPATCH(SpvOpSMod, SMod);

     DISPATCH(SpvOpSNegate, SNegate);

     DISPATCH(SpvOpSRem, SRem);

     DISPATCH(SpvOpStore, Store);

     DISPATCH(SpvOpSwitch, Switch);

     DISPATCH(SpvOpUConvert, UConvert);

     DISPATCH(SpvOpUDiv, UDiv);

     DISPATCH(SpvOpUGreaterThan, UGreaterThan);

     DISPATCH(SpvOpUGreaterThanEqual, UGreaterThanEqual);

     DISPATCH(SpvOpULessThan, ULessThan);

     DISPATCH(SpvOpULessThanEqual, ULessThanEqual);

     DISPATCH(SpvOpUMod, UMod);

     DISPATCH(SpvOpUndef, Undef);

     DISPATCH(SpvOpUnreachable, Unreachable);

     DISPATCH(SpvOpVariable, Variable);

     DISPATCH(SpvOpVectorExtractDynamic, VectorExtractDynamic);

     DISPATCH(SpvOpVectorInsertDynamic, VectorInsertDynamic);

     DISPATCH(SpvOpVectorShuffle, VectorShuffle);

     DISPATCH(SpvOpVectorTimesMatrix, VectorTimesMatrix);

     DISPATCH(SpvOpVectorTimesScalar, VectorTimesScalar);


     NOP(SpvOpNop);

     NOP(SpvOpLine);

     NOP(SpvOpLoopMerge);

     NOP(SpvOpMemoryBarrier);

     NOP(SpvOpNoLine);

     NOP(SpvOpSelectionMerge);


 #undef DISPATCH

 #undef NOP


   default:

     Dev.reportError("Unimplemented instruction", true);

   }

 }


 void Invocation::executeAccessChain(const Instruction *Inst)

 {

   // Base pointer.

   uint32_t Id = Inst->getOperand(1);

   Object &Base = Objects[Inst->getOperand(2)];


   // Ensure base pointer is valid.

   if (!Base)

   {

     // Check for buffer variable matching base pointer ID.

     for (auto V : CurrentModule->getVariables())

     {

       if (V->getId() == Inst->getOperand(2))

       {

         // Report error for missing descriptor set entry.

         if (V->isBufferVariable())

         {

           std::stringstream Err;

           Err << "Invalid base pointer for descriptor set entry ("

               << V->getDecoration(SpvDecorationDescriptorSet) << ","

               << V->getDecoration(SpvDecorationBinding) << ")";

           Dev.reportError(Err.str());

         }

         else

         {

           Dev.reportError("Unresolved OpVariable pointer", true);

         }


         // Set result pointer to null.

         Objects[Id] = Object(Inst->getResultType(), (uint64_t)0);

         return;

       }

     }

     assert(false && "Invalid base pointer for AccessChain");

   }


   uint64_t Result = Base.get<uint64_t>();

   const Type *Ty = Base.getType()->getElementType();


   // Initialize matrix layout.

   PtrMatrixLayout MatrixLayout;

   if (Ty->isMatrix() || Ty->isVector())

     MatrixLayout = Base.getMatrixLayout();


   // Offset of the first index operand.

   uint32_t FirstIndexOperand = 3;


   // Perform initial dereference for element index for OpPtrAccessChain.

   if (Inst->getOpcode() == SpvOpPtrAccessChain)

   {

     // TODO: Need to handle this?

     assert(!Base.getDescriptorElements());


     FirstIndexOperand = 4;

     switch (Objects[Inst->getOperand(3)].getType()->getSize())

     {

     case 2:

       Result += Base.getType()->getElementOffset(OP(3, uint16_t));

       break;

     case 4:

       Result += Base.getType()->getElementOffset(OP(3, uint32_t));

       break;

     case 8:

       Result += Base.getType()->getElementOffset(OP(3, uint64_t));

       break;

     default:

       Dev.reportError("Unhandled index size", true);

       return;

     }

   }


   // Loop over indices.

   for (uint32_t i = FirstIndexOperand; i < Inst->getNumOperands(); i++)

   {

     uint64_t Index;

     const Object &IndexObj = Objects[Inst->getOperand(i)];

     switch (IndexObj.getType()->getSize())

     {

     case 2:

       Index = IndexObj.get<uint16_t>();

       break;

     case 4:

       Index = IndexObj.get<uint32_t>();

       break;

     case 8:

       Index = IndexObj.get<uint64_t>();

       break;

     default:

       Dev.reportError("Unhandled index size", true);

       return;

     }


     const Type *ElemTy = Ty->getElementType(Index);


     if (Base.getDescriptorElements() && i == FirstIndexOperand)

     {

       // Special case for arrays of descriptors.

       if (Index < Ty->getElementCount())

       {

         Result = Base.getDescriptorElements()[Index].Address;

       }

       else

       {

         Dev.reportError("Descriptor array element exceeds array size", false);

         Result = 0;

       }

     }

     else if (Ty->isMatrix() && MatrixLayout)

     {

       // Special case for matrix pointers with non-default layouts.

       if (MatrixLayout.Order == PtrMatrixLayout::COL_MAJOR)

         Result += Index * MatrixLayout.Stride;

       else

         Result += Index * ElemTy->getElementType()->getSize();

     }

     else if (Ty->isVector() && MatrixLayout)

     {

       // Special case for vector pointers with non-default layouts.

       if (MatrixLayout.Order == PtrMatrixLayout::COL_MAJOR)

         Result += Index * ElemTy->getSize();

       else

         Result += Index * MatrixLayout.Stride;

     }

     else

     {

       Result += Ty->getElementOffset(Index);

     }


     // Check for structure member decorations that affect memory layout.

     if (Ty->getTypeId() == Type::STRUCT)

     {

       auto &Decorations = Ty->getStructMemberDecorations((uint32_t)Index);

       if (Decorations.count(SpvDecorationMatrixStride))

       {

         // Track matrix layout.

         MatrixLayout.Stride = Decorations.at(SpvDecorationMatrixStride);

         if (Decorations.count(SpvDecorationColMajor))

         {

           MatrixLayout.Order = PtrMatrixLayout::COL_MAJOR;

         }

         else

         {

           assert(Decorations.count(SpvDecorationRowMajor));

           MatrixLayout.Order = PtrMatrixLayout::ROW_MAJOR;

         }

       }

     }


     Ty = ElemTy;

   }


   Objects[Id] = Object(Inst->getResultType(), Result);


   // Set matrix layout for result pointer if necessary.

   if (MatrixLayout && (Ty->isVector() || Ty->isMatrix()))

     Objects[Id].setMatrixLayout(MatrixLayout);

 }


 void Invocation::executeAll(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   Object Result(Inst->getResultType(), true);

   const Object &Vector = Objects[Inst->getOperand(2)];

   for (uint32_t i = 0; i < Vector.getType()->getElementCount(); i++)

   {

     if (!Vector.get<bool>(i))

     {

       Result.set(false);

       break;

     }

   }

   Objects[Id] = Result;

 }


 void Invocation::executeAny(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   Object Result(Inst->getResultType(), false);

   const Object &Vector = Objects[Inst->getOperand(2)];

   for (uint32_t i = 0; i < Vector.getType()->getElementCount(); i++)

   {

     if (Vector.get<bool>(i))

     {

       Result.set(true);

       break;

     }

   }

   Objects[Id] = Result;

 }


 template <typename T> void Invocation::executeAtomicOp(const Instruction *Inst)

 {

   assert(Inst->getOpcode() != SpvOpAtomicCompareExchange);


   // Get index of pointer operand.

   uint32_t PtrOp = (Inst->getOpcode() == SpvOpAtomicStore) ? 0 : 2;


   Object Pointer = Objects[Inst->getOperand(PtrOp)];

   uint32_t Scope = Objects[Inst->getOperand(PtrOp + 1)].get<uint32_t>();

   uint32_t Semantics = Objects[Inst->getOperand(PtrOp + 2)].get<uint32_t>();


   // Get value operand if present.

   T Value = 0;

   if (Inst->getNumOperands() > (PtrOp + 3))

     Value = Objects[Inst->getOperand(PtrOp + 3)].get<T>();


   // Perform atomic operation.

   Memory &Mem = getMemory(Pointer.getType()->getStorageClass());

   T Result = Mem.atomic<T>(Pointer.get<uint64_t>(), Inst->getOpcode(), Scope,

                            Semantics, Value);


   // Create result if necessary.

   if (PtrOp == 2)

     Objects[Inst->getOperand(1)] = Object(Inst->getResultType(), Result);

 }


 void Invocation::executeAtomicCompareExchange(const Instruction *Inst)

 {

   Object Pointer = Objects[Inst->getOperand(2)];

   uint32_t Scope = Objects[Inst->getOperand(3)].get<uint32_t>();

   uint32_t EqualSemantics = Objects[Inst->getOperand(4)].get<uint32_t>();

   uint32_t UnequalSemantics = Objects[Inst->getOperand(5)].get<uint32_t>();

   uint32_t Value = Objects[Inst->getOperand(6)].get<uint32_t>();

   uint32_t Comparator = Objects[Inst->getOperand(7)].get<uint32_t>();


   Memory &Mem = getMemory(Pointer.getType()->getStorageClass());

   uint32_t Result =

       Mem.atomicCmpXchg(Pointer.get<uint64_t>(), Scope, EqualSemantics,

                         UnequalSemantics, Value, Comparator);

   Objects[Inst->getOperand(1)] = Object(Inst->getResultType(), Result);

 }


 void Invocation::executeBitcast(const Instruction *Inst)

 {

   const Object &Source = Objects[Inst->getOperand(2)];

   Object Result = Object(Inst->getResultType(), Source.getData());

   Objects[Inst->getOperand(1)] = Result;

 }


 void Invocation::executeBitwiseAnd(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A & B; });

 }


 void Invocation::executeBitwiseOr(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A | B; });

 }


 void Invocation::executeBitwiseXor(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A ^ B; });

 }


 void Invocation::executeBranch(const Instruction *Inst)

 {

   moveToBlock(Inst->getOperand(0));

 }


 void Invocation::executeBranchConditional(const Instruction *Inst)

 {

   bool Condition = OP(0, bool);

   moveToBlock(Inst->getOperand(Condition ? 1 : 2));

 }


 void Invocation::executeCompositeConstruct(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);


   Object Result = Object(Inst->getResultType());


   // Set constituent values.

   for (uint32_t i = 2; i < Inst->getNumOperands(); i++)

   {

     uint32_t Id = Inst->getOperand(i);

     Result.insert({i - 2}, Objects[Id]);

   }


   Objects[Id] = Result;

 }


 void Invocation::executeCompositeExtract(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   // TODO: Handle indices of different sizes.

   std::vector<uint32_t> Indices(Inst->getOperands() + 3,

                                 Inst->getOperands() + Inst->getNumOperands());

   Objects[Id] = Objects[Inst->getOperand(2)].extract(Indices);

 }


 void Invocation::executeCompositeInsert(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   Object &Element = Objects[Inst->getOperand(2)];

   // TODO: Handle indices of different sizes.

   std::vector<uint32_t> Indices(Inst->getOperands() + 4,

                                 Inst->getOperands() + Inst->getNumOperands());

   assert(Objects[Inst->getOperand(3)].getType()->isComposite());

   Objects[Id] = Objects[Inst->getOperand(3)];

   Objects[Id].insert(Indices, Element);

 }


 void Invocation::executeControlBarrier(const Instruction *Inst)

 {

   // TODO: Handle other execution scopes

   assert(Objects[Inst->getOperand(0)].get<uint32_t>() == SpvScopeWorkgroup);

   AtBarrier = true;

 }


 void Invocation::executeConvertFToS(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getScalarType()->getBitWidth())

   {

   case 16:

     executeOpFP<1>(Inst, [](auto A) -> int16_t { return (int16_t)A; });

     break;

   case 32:

     executeOpFP<1>(Inst, [](auto A) -> int32_t { return (int32_t)A; });

     break;

   case 64:

     executeOpFP<1>(Inst, [](auto A) -> int64_t { return (int64_t)A; });

     break;

   default:

     assert(false && "Unhandled integer size for OpConvertFToS");

   }

 }


 void Invocation::executeConvertFToU(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getScalarType()->getBitWidth())

   {

   case 16:

     executeOpFP<1>(Inst, [](auto A) -> uint16_t { return (uint16_t)A; });

     break;

   case 32:

     executeOpFP<1>(Inst, [](auto A) -> uint32_t { return (uint32_t)A; });

     break;

   case 64:

     executeOpFP<1>(Inst, [](auto A) -> uint64_t { return (uint64_t)A; });

     break;

   default:

     assert(false && "Unhandled integer size for OpConvertFToU");

   }

 }


 void Invocation::executeConvertSToF(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getScalarType()->getBitWidth())

   {

   case 32:

     executeOpSInt<1>(Inst, [](auto A) -> float { return (float)A; });

     break;

   case 64:

     executeOpSInt<1>(Inst, [](auto A) -> double { return (double)A; });

     break;

   default:

     assert(false && "Unhandled floating point size for OpConvertUToF");

   }

 }


 void Invocation::executeConvertUToF(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getScalarType()->getBitWidth())

   {

   case 32:

     executeOpUInt<1>(Inst, [](auto A) -> float { return (float)A; });

     break;

   case 64:

     executeOpUInt<1>(Inst, [](auto A) -> double { return (double)A; });

     break;

   default:

     assert(false && "Unhandled floating point size for OpConvertUToF");

   }

 }


 void Invocation::executeCopyMemory(const Instruction *Inst)

 {

   const Object &Dst = Objects[Inst->getOperand(0)];

   const Object &Src = Objects[Inst->getOperand(1)];


   const Type *DstType = Dst.getType();

   const Type *SrcType = Src.getType();

   assert(DstType->getElementType() == SrcType->getElementType());


   Memory &DstMem = getMemory(DstType->getStorageClass());

   Memory &SrcMem = getMemory(SrcType->getStorageClass());


   uint64_t DstAddress = Dst.get<uint64_t>();

   uint64_t SrcAddress = Src.get<uint64_t>();

   uint64_t NumBytes = DstType->getElementType()->getSize();

   Memory::copy(DstAddress, DstMem, SrcAddress, SrcMem, NumBytes);

 }


 void Invocation::executeCopyObject(const Instruction *Inst)

 {

   Objects[Inst->getOperand(1)] = Objects[Inst->getOperand(2)];

 }


 void Invocation::executeDot(const Instruction *Inst)

 {

   Object &A = Objects[Inst->getOperand(2)];

   Object &B = Objects[Inst->getOperand(3)];

   switch (Inst->getResultType()->getBitWidth())

   {

   case 32:

   {

     float Result = 0.f;

     for (uint32_t i = 0; i < A.getType()->getElementCount(); i++)

       Result += A.get<float>(i) * B.get<float>(i);

     Objects[Inst->getOperand(1)] = Object(Inst->getResultType(), Result);

     break;

   }

   case 64:

   {

     double Result = 0.0;

     for (uint32_t i = 0; i < A.getType()->getElementCount(); i++)

       Result += A.get<double>(i) * B.get<double>(i);

     Objects[Inst->getOperand(1)] = Object(Inst->getResultType(), Result);

     break;

   }

   default:

     assert(false && "Unhandled floating point size for OpDot");

   }

 }


 void Invocation::executeExtInst(const Instruction *Inst)

 {

   // TODO: Currently assumes extended instruction set is GLSL.std.450

   uint32_t ExtInst = Inst->getOperand(3);

   switch (ExtInst)

   {

   case GLSLstd450Acos:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return acos(X); });

     break;

   case GLSLstd450Acosh:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return acosh(X); });

     break;

   case GLSLstd450Asin:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return asin(X); });

     break;

   case GLSLstd450Asinh:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return asinh(X); });

     break;

   case GLSLstd450Atan:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return atan(X); });

     break;

   case GLSLstd450Atanh:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return atanh(X); });

     break;

   case GLSLstd450Atan2:

     executeOpFP<2, 4>(

         Inst, [](auto Y, auto X) -> decltype(X) { return atan2(Y, X); });

     break;

   case GLSLstd450Cos:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return cos(X); });

     break;

   case GLSLstd450Cosh:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return cosh(X); });

     break;

   case GLSLstd450FAbs:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return fabs(X); });

     break;

   case GLSLstd450Fma:

   {

     executeOpFP<3, 4>(

         Inst, [](auto A, auto B, auto C) -> decltype(A) { return A * B + C; });

     break;

   }

   case GLSLstd450Floor:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return floor(X); });

     break;

   case GLSLstd450InverseSqrt:

     executeOpFP<1, 4>(Inst,

                       [](auto X) -> decltype(X) { return 1.f / sqrt(X); });

     break;

   case GLSLstd450NClamp:

     executeOpFP<3, 4>(Inst, [](auto X, auto Min, auto Max) -> decltype(X) {

       return fmin(fmax(X, Min), Max);

     });

     break;

   case GLSLstd450FMax:

   case GLSLstd450NMax:

     executeOpFP<2, 4>(Inst,

                       [](auto X, auto Y) -> decltype(X) { return fmax(X, Y); });

     break;

   case GLSLstd450FMin:

   case GLSLstd450NMin:

     executeOpFP<2, 4>(Inst,

                       [](auto X, auto Y) -> decltype(X) { return fmin(X, Y); });

     break;

   case GLSLstd450Sin:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return sin(X); });

     break;

   case GLSLstd450Sinh:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return sinh(X); });

     break;

   case GLSLstd450Sqrt:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return sqrt(X); });

     break;

   case GLSLstd450Tan:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return tan(X); });

     break;

   case GLSLstd450Tanh:

     executeOpFP<1, 4>(Inst, [](auto X) -> decltype(X) { return tanh(X); });

     break;

   default:

     Dev.reportError("Unimplemented GLSL.std.450 extended instruction", true);

   }

 }


 void Invocation::executeFAdd(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> decltype(A) { return A + B; });

 }


 void Invocation::executeFConvert(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getBitWidth())

   {

   case 32:

     executeOpFP<1>(Inst, [](auto A) -> float { return (float)A; });

     break;

   case 64:

     executeOpFP<1>(Inst, [](auto A) -> double { return (double)A; });

     break;

   default:

     assert(false && "Unhandled floating point size for OpFConvert");

   }

 }


 void Invocation::executeFDiv(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> decltype(A) { return A / B; });

 }


 void Invocation::executeFMod(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> decltype(A) {

     return A - (B * floor(A / B));

   });

 }


 void Invocation::executeFMul(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> decltype(A) { return A * B; });

 }


 void Invocation::executeFNegate(const Instruction *Inst)

 {

   executeOpFP<1>(Inst, [](auto A) -> decltype(A) { return -A; });

 }


 void Invocation::executeFOrdEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A == B && !std::isunordered(A, B);

   });

 }


 void Invocation::executeFOrdGreaterThan(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A > B && !std::isunordered(A, B);

   });

 }


 void Invocation::executeFOrdGreaterThanEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A >= B && !std::isunordered(A, B);

   });

 }


 void Invocation::executeFOrdLessThan(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A < B && !std::isunordered(A, B);

   });

 }


 void Invocation::executeFOrdLessThanEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A <= B && !std::isunordered(A, B);

   });

 }


 void Invocation::executeFOrdNotEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A != B && !std::isunordered(A, B);

   });

 }


 void Invocation::executeFRem(const Instruction *Inst)

 {

   executeOpFP<2>(Inst,

                  [](auto A, auto B) -> decltype(A) { return fmod(A, B); });

 }


 void Invocation::executeFSub(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> decltype(A) { return A - B; });

 }


 void Invocation::executeFunctionCall(const Instruction *Inst)

 {

   const Function *Func = CurrentModule->getFunction(Inst->getOperand(2));


   // Copy function parameters.

   assert(Inst->getNumOperands() == Func->getNumParams() + 3);

   for (int i = 3; i < Inst->getNumOperands(); i++)

     Objects[Func->getParamId(i - 3)] = Objects[Inst->getOperand(i)];


   // Create call stack entry.

   StackEntry SE;

   SE.CallInst = Inst;

   SE.CallFunc = CurrentFunction;

   SE.CallBlock = CurrentBlock;

   CallStack.push_back(SE);


   // Move to first block of callee function.

   CurrentFunction = Func;

   moveToBlock(CurrentFunction->getFirstBlockId());

 }


 void Invocation::executeFUnordEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A == B || std::isunordered(A, B);

   });

 }


 void Invocation::executeFUnordGreaterThan(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A > B || std::isunordered(A, B);

   });

 }


 void Invocation::executeFUnordGreaterThanEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A >= B || std::isunordered(A, B);

   });

 }


 void Invocation::executeFUnordLessThan(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A < B || std::isunordered(A, B);

   });

 }


 void Invocation::executeFUnordLessThanEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A <= B || std::isunordered(A, B);

   });

 }


 void Invocation::executeFUnordNotEqual(const Instruction *Inst)

 {

   executeOpFP<2>(Inst, [](auto A, auto B) -> bool {

     return A != B || std::isunordered(A, B);

   });

 }


 void Invocation::executeIAdd(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A + B; });

 }


 void Invocation::executeIEqual(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> bool { return A == B; });

 }


 void Invocation::executeImage(const Instruction *Inst)

 {

   // Extract image object from a sampled image.

   const Object &SampledImageObj = Objects[Inst->getOperand(2)];

   const SampledImage *SI = (const SampledImage *)(SampledImageObj.getData());

   Objects[Inst->getOperand(1)] =

       Object(SampledImageObj.getType()->getElementType(),

              (const uint8_t *)&(SI->Image));

 }


 void Invocation::executeImageQuerySize(const Instruction *Inst)

 {

   // Get image view object.

   const Object &ImageObj = Objects[Inst->getOperand(2)];

   const ImageView *Image = *(const ImageView **)(ImageObj.getData());


   Object Result(Inst->getResultType());

   assert(Result.getType()->getScalarType()->getBitWidth() == 32);


   // Get mip level (if explicit).

   uint32_t Level = 0;

   if (Inst->getOpcode() == SpvOpImageQuerySizeLod)

     Level = Objects[Inst->getOperand(3)].get<uint32_t>();


   // Get size in each dimension.

   uint32_t ArraySizeIndex;

   switch (ImageObj.getType()->getDimensionality())

   {

   case SpvDim1D:

   case SpvDimBuffer:

     Result.set<uint32_t>(Image->getWidth(Level), 0);

     ArraySizeIndex = 1;

     break;

   case SpvDim2D:

   case SpvDimCube:

   case SpvDimRect:

     Result.set<uint32_t>(Image->getWidth(Level), 0);

     Result.set<uint32_t>(Image->getHeight(Level), 1);

     ArraySizeIndex = 2;

     break;

   case SpvDim3D:

     Result.set<uint32_t>(Image->getWidth(Level), 0);

     Result.set<uint32_t>(Image->getHeight(Level), 1);

     Result.set<uint32_t>(Image->getDepth(Level), 2);

     ArraySizeIndex = 3;

     break;

   default:

     Dev.reportError("Unhandled image dimensionality", true);

     break;

   }


   // Get number of array layers.

   if (ImageObj.getType()->isArrayedImage())

   {

     if (ImageObj.getType()->getDimensionality() == SpvDimCube)

       Result.set<uint32_t>(Image->getNumArrayLayers() / 6, ArraySizeIndex);

     else

       Result.set<uint32_t>(Image->getNumArrayLayers(), ArraySizeIndex);

   }


   Objects[Inst->getOperand(1)] = Result;

 }


 void Invocation::executeImageRead(const Instruction *Inst)

 {

   // Get image view object.

   const Object &ImageObj = Objects[Inst->getOperand(2)];

   const ImageView *Image = *(const ImageView **)(ImageObj.getData());


   // TODO: Handle subpass data dimensionality

   assert(ImageObj.getType()->getDimensionality() != SpvDimSubpassData);


   // Get coordinate operand.

   const Object &Coord = Objects[Inst->getOperand(3)];

   const Type *CoordType = Coord.getType();

   uint32_t NumCoords = CoordType->getElementCount();

   assert(NumCoords <= 3);


   // Last coordinate is array layer if required.

   uint32_t Layer = 0;

   if (ImageObj.getType()->isArrayedImage() ||

       ImageObj.getType()->getDimensionality() == SpvDimCube)

     Layer = Coord.get<uint32_t>(--NumCoords);


   // Extract coordinates.

   uint32_t X = Coord.get<uint32_t>(0);

   uint32_t Y = (NumCoords > 1) ? Coord.get<uint32_t>(1) : 0;

   uint32_t Z = (NumCoords > 2) ? Coord.get<uint32_t>(2) : 0;

   uint32_t Level = 0;


   // Handle optional image operands.

   if (Inst->getNumOperands() > 4)

   {

     uint32_t OpIdx = 5;

     uint32_t OperandMask = Inst->getOperand(4);


     if (OperandMask & SpvImageOperandsLodMask)

     {

       Level = Objects[Inst->getOperand(OpIdx++)].get<uint32_t>();

       OperandMask ^= SpvImageOperandsLodMask;

     }


     // Check for any remaining values after all supported operands handled.

     if (OperandMask)

       Dev.reportError("Unhandled image operand mask", true);

     assert(OpIdx == Inst->getNumOperands());

   }


   // Read texel from image.

   Image::Texel T;

   Image->read(T, X, Y, Z, Layer, Level);

   Objects[Inst->getOperand(1)] = T.toObject(Inst->getResultType());

 }


 void Invocation::executeImageSampleExplicitLod(const Instruction *Inst)

 {

   // Get sampler and image view objects.

   const Object &SampledImageObj = Objects[Inst->getOperand(2)];

   const SampledImage *SI = (const SampledImage *)(SampledImageObj.getData());

   const ImageView *Image = SI->Image;

   const Sampler *Sampler = SI->Sampler;

   const Type *ImageType = SampledImageObj.getType()->getElementType();


   // Get coordinate operand.

   const Object &Coord = Objects[Inst->getOperand(3)];

   const Type *CoordType = Coord.getType();

   uint32_t NumCoords = CoordType->getElementCount();

   assert(CoordType->getScalarType()->isFloat());

   assert(NumCoords <= 3);


   // Last coordinate is array layer if required.

   float Layer = 0;

   if (ImageType->isArrayedImage() ||

       ImageType->getDimensionality() == SpvDimCube)

     Layer = Coord.get<float>(--NumCoords);


   // Extract coordinates.

   float X = Coord.get<float>(0);

   float Y = (NumCoords > 1) ? Coord.get<float>(1) : 0;

   float Z = (NumCoords > 2) ? Coord.get<float>(2) : 0;


   // TODO: Handle additional operands

   // TODO: Handle Lod properly

   assert(Inst->getNumOperands() == 6);

   assert(Inst->getOperand(4) == SpvImageOperandsLodMask);

   assert(Objects[Inst->getOperand(5)].get<float>() == 0);


   // Sample texel from image.

   Image::Texel Texel;

   Sampler->sample(Image, Texel, X, Y, Z, Layer);

   Objects[Inst->getOperand(1)] = Texel.toObject(Inst->getResultType());

 }


 void Invocation::executeImageWrite(const Instruction *Inst)

 {

   // Get image view object.

   const Object &ImageObj = Objects[Inst->getOperand(0)];

   const ImageView *Image = *(const ImageView **)(ImageObj.getData());


   // TODO: Handle additional operands

   assert(Inst->getNumOperands() == 3);


   // Get coordinate operand.

   const Object &Coord = Objects[Inst->getOperand(1)];

   const Type *CoordType = Coord.getType();

   uint32_t NumCoords = CoordType->getElementCount();

   assert(NumCoords <= 3);


   // Last coordinate is array layer if required.

   uint32_t Layer = 0;

   if (ImageObj.getType()->isArrayedImage() ||

       ImageObj.getType()->getDimensionality() == SpvDimCube)

     Layer = Coord.get<uint32_t>(--NumCoords);


   // Extract coordinates.

   uint32_t X = Coord.get<uint32_t>(0);

   uint32_t Y = (NumCoords > 1) ? Coord.get<uint32_t>(1) : 0;

   uint32_t Z = (NumCoords > 2) ? Coord.get<uint32_t>(2) : 0;


   // Write texel to image.

   const Object &Texel = Objects[Inst->getOperand(2)];

   Image->write(Texel, X, Y, Z, Layer);

 }


 void Invocation::executeIMul(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A * B; });

 }


 void Invocation::executeINotEqual(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> bool { return A != B; });

 }


 void Invocation::executeIsInf(const Instruction *Inst)

 {

   executeOpFP<1>(Inst, [](auto A) -> bool { return std::isinf(A); });

 }


 void Invocation::executeIsNan(const Instruction *Inst)

 {

   executeOpFP<1>(Inst, [](auto A) -> bool { return std::isnan(A); });

 }


 void Invocation::executeISub(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A - B; });

 }


 void Invocation::executeKill(const Instruction *Inst)

 {

   Discarded = true;

   CurrentInstruction = nullptr;

 }


 void Invocation::executeLoad(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   const Object &Src = Objects[Inst->getOperand(2)];

   Memory &Mem = getMemory(Src.getType()->getStorageClass());

   Objects[Id] = Object::load(Inst->getResultType(), Mem, Src);

 }


 void Invocation::executeLogicalAnd(const Instruction *Inst)

 {

   executeOp<bool, 2>(Inst, [](bool A, bool B) { return A && B; });

 }


 void Invocation::executeLogicalEqual(const Instruction *Inst)

 {

   executeOp<bool, 2>(Inst, [](bool A, bool B) { return A == B; });

 }


 void Invocation::executeLogicalNot(const Instruction *Inst)

 {

   executeOp<bool, 1>(Inst, [](bool A) { return !A; });

 }


 void Invocation::executeLogicalNotEqual(const Instruction *Inst)

 {

   executeOp<bool, 2>(Inst, [](bool A, bool B) { return A != B; });

 }


 void Invocation::executeLogicalOr(const Instruction *Inst)

 {

   executeOp<bool, 2>(Inst, [](bool A, bool B) { return A || B; });

 }


 void Invocation::executeMatrixTimesScalar(const Instruction *Inst)

 {

   Object Matrix = Objects[Inst->getOperand(2)];

   Object Scalar = Objects[Inst->getOperand(3)];

   const Type *MatrixType = Matrix.getType();

   const Type *VectorType = MatrixType->getElementType();

   const Type *ScalarType = VectorType->getElementType();

   assert(ScalarType->isFloat());


   for (uint32_t col = 0; col < MatrixType->getElementCount(); col++)

   {

     for (uint32_t row = 0; row < VectorType->getElementCount(); row++)

     {

       Object Element = Matrix.extract({col, row});

       switch (ScalarType->getBitWidth())

       {

       case 32:

         Element.set(Element.get<float>() * Scalar.get<float>());

         break;

       case 64:

         Element.set(Element.get<double>() * Scalar.get<double>());

         break;

       default:

         Dev.reportError("Unhandled floating point size", true);

         break;

       }

       Matrix.insert({col, row}, Element);

     }

   }


   Objects[Inst->getOperand(1)] = Matrix;

 }


 void Invocation::executeMatrixTimesVector(const Instruction *Inst)

 {

   Object Matrix = Objects[Inst->getOperand(2)];

   Object Vector = Objects[Inst->getOperand(3)];

   const Type *MatrixType = Matrix.getType();

   const Type *ColumnType = MatrixType->getElementType();

   const Type *ScalarType = Inst->getResultType()->getElementType();

   assert(ScalarType->isFloat());


   Object Result(Inst->getResultType());

   for (uint32_t row = 0; row < ColumnType->getElementCount(); row++)

   {

     switch (ScalarType->getBitWidth())

     {

     case 32:

     {

       float R = 0.f;

       for (uint32_t col = 0; col < MatrixType->getElementCount(); col++)

         R += Vector.get<float>(col) * Matrix.extract({col, row}).get<float>();

       Result.set(R, row);

       break;

     }

     case 64:

     {

       double R = 0.f;

       for (uint32_t col = 0; col < MatrixType->getElementCount(); col++)

         R += Vector.get<double>(col) * Matrix.extract({col, row}).get<double>();

       Result.set(R, row);

       break;

     }

     default:

       Dev.reportError("Unhandled floating point size", true);

       break;

     }

   }

   Objects[Inst->getOperand(1)] = Result;

 }


 void Invocation::executeNot(const Instruction *Inst)

 {

   executeOpUInt<1>(Inst, [](auto A) -> decltype(A) { return ~A; });

 }


 void Invocation::executePhi(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);


   assert(PreviousBlock);

   for (int i = 2; i < Inst->getNumOperands(); i += 2)

   {

     assert(i + 1 < Inst->getNumOperands());

     if (Inst->getOperand(i + 1) == PreviousBlock)

     {

       PhiTemps.push_back({Id, Objects[Inst->getOperand(i)]});

       return;

     }

   }

   assert(false && "no matching predecessor block for OpPhi");

 }


 void Invocation::executeReturn(const Instruction *Inst)

 {

   // If this is the entry function, do nothing.

   if (CallStack.empty())

     return;


   StackEntry SE = CallStack.back();

   CallStack.pop_back();


   // Release function scope allocations.

   for (uint64_t Address : SE.Allocations)

     PrivateMemory->release(Address);


   // Return to calling function.

   CurrentFunction = SE.CallFunc;

   CurrentBlock = SE.CallBlock;

   CurrentInstruction = SE.CallInst->next();

 }


 void Invocation::executeReturnValue(const Instruction *Inst)

 {

   assert(!CallStack.empty());


   StackEntry SE = CallStack.back();

   CallStack.pop_back();


   // Set return value.

   Objects[SE.CallInst->getOperand(1)] = Objects[Inst->getOperand(0)];


   // Release function scope allocations.

   for (uint64_t Address : SE.Allocations)

     PrivateMemory->release(Address);


   // Return to calling function.

   CurrentFunction = SE.CallFunc;

   CurrentBlock = SE.CallBlock;

   CurrentInstruction = SE.CallInst->next();

 }


 void Invocation::executeSampledImage(const Instruction *Inst)

 {

   // Get image view object.

   const Object &ImageObj = Objects[Inst->getOperand(2)];

   const ImageView *Image = *(const ImageView **)(ImageObj.getData());


   // Get sampler object.

   const Object &SamplerObj = Objects[Inst->getOperand(3)];

   const Sampler *Sampler = *(const talvos::Sampler **)(SamplerObj.getData());


   // Create and populate SampledImage structure.

   Object Result(Inst->getResultType());

   ((SampledImage *)(Result.getData()))->Image = Image;

   ((SampledImage *)(Result.getData()))->Sampler = Sampler;

   Objects[Inst->getOperand(1)] = Result;

 }


 void Invocation::executeSConvert(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getBitWidth())

   {

   case 16:

     executeOpSInt<1>(Inst, [](auto A) -> int16_t { return (int16_t)A; });

     break;

   case 32:

     executeOpSInt<1>(Inst, [](auto A) -> int32_t { return (int32_t)A; });

     break;

   case 64:

     executeOpSInt<1>(Inst, [](auto A) -> int64_t { return (int64_t)A; });

     break;

   default:

     assert(false && "Unhandled integer size for OpSConvert");

   }

 }


 void Invocation::executeSDiv(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A / B; });

 }


 void Invocation::executeSelect(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   const Object &Condition = Objects[Inst->getOperand(2)];

   const Object &Object1 = Objects[Inst->getOperand(3)];

   const Object &Object2 = Objects[Inst->getOperand(4)];


   if (Condition.getType()->isScalar())

   {

     Objects[Id] = Condition.get<bool>() ? Object1 : Object2;

   }

   else

   {

     Object Result(Inst->getResultType());

     for (uint32_t i = 0; i < Result.getType()->getElementCount(); i++)

     {

       Result.insert({i}, Condition.get<bool>(i) ? Object1.extract({i})

                                                 : Object2.extract({i}));

     }

     Objects[Id] = Result;

   }

 }


 void Invocation::executeSGreaterThan(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> bool { return A > B; });

 }


 void Invocation::executeSGreaterThanEqual(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> bool { return A >= B; });

 }


 void Invocation::executeShiftLeftLogical(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A << B; });

 }


 void Invocation::executeShiftRightArithmetic(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A >> B; });

 }


 void Invocation::executeShiftRightLogical(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A >> B; });

 }


 void Invocation::executeSLessThan(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> bool { return A < B; });

 }


 void Invocation::executeSLessThanEqual(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> bool { return A <= B; });

 }


 void Invocation::executeSMod(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> decltype(A) {

     return (std::abs(A) % B) * (B < 0 ? -1 : 1);

   });

 }


 void Invocation::executeSNegate(const Instruction *Inst)

 {

   executeOpSInt<1>(Inst, [](auto A) -> decltype(A) { return -A; });

 }


 void Invocation::executeSRem(const Instruction *Inst)

 {

   executeOpSInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A % B; });

 }


 void Invocation::executeStore(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   const Object &Dest = Objects[Inst->getOperand(0)];

   Memory &Mem = getMemory(Dest.getType()->getStorageClass());

   Objects[Id].store(Mem, Dest);

 }


 void Invocation::executeSwitch(const Instruction *Inst)

 {

   const Object &Selector = Objects[Inst->getOperand(0)];


   // TODO: Handle other selector sizes

   if (Selector.getType()->getBitWidth() != 32)

     Dev.reportError("OpSwitch is only implemented for 32-bit selectors", true);


   for (uint32_t i = 2; i < Inst->getNumOperands(); i += 2)

   {

     if (Selector.get<uint32_t>() == Inst->getOperand(i))

     {

       moveToBlock(Inst->getOperand(i + 1));

       return;

     }

   }

   moveToBlock(Inst->getOperand(1));

 }


 void Invocation::executeUConvert(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getBitWidth())

   {

   case 16:

     executeOpUInt<1>(Inst, [](auto A) -> uint16_t { return (uint16_t)A; });

     break;

   case 32:

     executeOpUInt<1>(Inst, [](auto A) -> uint32_t { return (uint32_t)A; });

     break;

   case 64:

     executeOpUInt<1>(Inst, [](auto A) -> uint64_t { return (uint64_t)A; });

     break;

   default:

     assert(false && "Unhandled integer size for OpUConvert");

   }

 }


 void Invocation::executeUDiv(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A / B; });

 }


 void Invocation::executeUGreaterThan(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> bool { return A > B; });

 }


 void Invocation::executeUGreaterThanEqual(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> bool { return A >= B; });

 }


 void Invocation::executeULessThan(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> bool { return A < B; });

 }


 void Invocation::executeULessThanEqual(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> bool { return A <= B; });

 }


 void Invocation::executeUndef(const Instruction *Inst)

 {

   Objects[Inst->getOperand(1)] = Object(Inst->getResultType());

 }


 void Invocation::executeUnreachable(const Instruction *Inst)

 {

   Dev.reportError("OpUnreachable instruction executed", true);

 }


 void Invocation::executeUMod(const Instruction *Inst)

 {

   executeOpUInt<2>(Inst, [](auto A, auto B) -> decltype(A) { return A % B; });

 }


 void Invocation::executeVariable(const Instruction *Inst)

 {

   assert(Inst->getOperand(2) == SpvStorageClassFunction);


   uint32_t Id = Inst->getOperand(1);

   size_t AllocSize = Inst->getResultType()->getElementType()->getSize();

   uint64_t Address = PrivateMemory->allocate(AllocSize);

   Objects[Id] = Object(Inst->getResultType(), Address);


   // Initialize if necessary.

   if (Inst->getNumOperands() > 3)

     Objects[Inst->getOperand(3)].store(*PrivateMemory, Address);


   // Track function scope allocations.

   if (!CallStack.empty())

     CallStack.back().Allocations.push_back(Address);

 }


 void Invocation::executeVectorExtractDynamic(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   uint16_t Index = 0;

   switch (Objects[Inst->getOperand(3)].getType()->getSize())

   {

   case 2:

     Index = OP(3, uint16_t);

     break;

   case 4:

     Index = (uint16_t)OP(3, uint32_t);

     break;

   case 8:

     Index = (uint16_t)OP(3, uint64_t);

     break;

   default:

     assert(false && "Unhandled index size in OpVectorExtractDynamic");

   }


   const Object &Vector = Objects[Inst->getOperand(2)];

   if (Index >= Vector.getType()->getElementCount())

     Dev.reportError("Vector index out of range");

   Objects[Id] = Vector.extract({Index});

 }


 void Invocation::executeVectorInsertDynamic(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   uint16_t Index = 0;

   switch (Objects[Inst->getOperand(4)].getType()->getSize())

   {

   case 2:

     Index = OP(4, uint16_t);

     break;

   case 4:

     Index = (uint16_t)OP(4, uint32_t);

     break;

   case 8:

     Index = (uint16_t)OP(4, uint64_t);

     break;

   default:

     assert(false && "Unhandled index size in OpVectorInsertDynamic");

   }


   const Object &Vector = Objects[Inst->getOperand(2)];

   const Object &Component = Objects[Inst->getOperand(3)];

   if (Index >= Vector.getType()->getElementCount())

     Dev.reportError("Vector index out of range");

   Objects[Id] = Vector;

   Objects[Id].insert({Index}, Component);

 }


 void Invocation::executeVectorShuffle(const Instruction *Inst)

 {

   uint32_t Id = Inst->getOperand(1);

   Object Result(Inst->getResultType());


   const Object &Vec1 = Objects[Inst->getOperand(2)];

   const Object &Vec2 = Objects[Inst->getOperand(3)];

   uint32_t Vec1Length = Vec1.getType()->getElementCount();


   for (uint32_t i = 0; i < Inst->getResultType()->getElementCount(); i++)

   {

     uint32_t Idx = Inst->getOperand(4 + i);

     if (Idx == 0xFFFFFFFF)

       ;

     else if (Idx < Vec1Length)

       Result.insert({i}, Vec1.extract({Idx}));

     else

       Result.insert({i}, Vec2.extract({Idx - Vec1Length}));

   }


   Objects[Id] = Result;

 }


 void Invocation::executeVectorTimesMatrix(const Instruction *Inst)

 {

   Object Vector = Objects[Inst->getOperand(2)];

   Object Matrix = Objects[Inst->getOperand(3)];

   const Type *MatrixType = Matrix.getType();

   const Type *VectorType = Vector.getType();

   const Type *ScalarType = Inst->getResultType()->getElementType();

   assert(ScalarType->isFloat());


   Object Result(Inst->getResultType());

   for (uint32_t col = 0; col < MatrixType->getElementCount(); col++)

   {

     switch (ScalarType->getBitWidth())

     {

     case 32:

     {

       float R = 0.f;

       for (uint32_t row = 0; row < VectorType->getElementCount(); row++)

         R += Vector.get<float>(row) * Matrix.extract({col, row}).get<float>();

       Result.set(R, col);

       break;

     }

     case 64:

     {

       double R = 0.0;

       for (uint32_t row = 0; row < VectorType->getElementCount(); row++)

         R += Vector.get<double>(row) * Matrix.extract({col, row}).get<double>();

       Result.set(R, col);

       break;

     }

     default:

       Dev.reportError("Unhandled floating point size", true);

       break;

     }

   }

   Objects[Inst->getOperand(1)] = Result;

 }


 void Invocation::executeVectorTimesScalar(const Instruction *Inst)

 {

   switch (Inst->getResultType()->getScalarType()->getBitWidth())

   {

   case 32:

   {

     float Scalar = Objects[Inst->getOperand(3)].get<float>();

     executeOp<float, 1>(Inst, [&](float A) { return A * Scalar; });

     break;

   }

   case 64:

   {

     double Scalar = Objects[Inst->getOperand(3)].get<double>();

     executeOp<double, 1>(Inst, [&](double A) { return A * Scalar; });

     break;

   }

   default:

     assert(false && "Unhandled floating point size for OpDot");

   }

 }


 Memory &Invocation::getMemory(uint32_t StorageClass)

 {

   switch (StorageClass)

   {

   case SpvStorageClassPushConstant:

   case SpvStorageClassStorageBuffer:

   case SpvStorageClassUniform:

   case SpvStorageClassUniformConstant:

     return Dev.getGlobalMemory();

   case SpvStorageClassWorkgroup:

     assert(Group && "Not executing within a workgroup.");

     return Group->getLocalMemory();

   case SpvStorageClassInput:

   case SpvStorageClassOutput:

     return *PipelineMemory;

   case SpvStorageClassFunction:

   case SpvStorageClassPrivate:

     return *PrivateMemory;

   default:

     assert(false && "Unhandled storage class");

     abort();

   }

 }


 Object Invocation::getObject(uint32_t Id) const

 {

   if (Id < Objects.size())

     return Objects[Id];

   else

     return Object();

 }


 Invocation::State Invocation::getState() const

 {

   if (AtBarrier)

     return BARRIER;

   return CurrentInstruction ? READY : FINISHED;

 }


 void Invocation::moveToBlock(uint32_t Id)

 {

   const Block *B = CurrentFunction->getBlock(Id);

   CurrentInstruction = B->getLabel().next();

   PreviousBlock = CurrentBlock;

   CurrentBlock = Id;

 }


 void Invocation::step()

 {

   assert(getState() == READY);

   assert(CurrentInstruction);


   const Instruction *I = CurrentInstruction;


   if (!PhiTemps.empty() && I->getOpcode() != SpvOpPhi &&

       I->getOpcode() != SpvOpLine)

   {

     for (auto &P : PhiTemps)

       Objects[P.first] = std::move(P.second);

     PhiTemps.clear();

   }


   execute(I);


   // Move program counter to next instruction, unless a terminator instruction

   // was executed.

   if (I == CurrentInstruction)

     CurrentInstruction = CurrentInstruction->next();


   Dev.reportInstructionExecuted(this, I);


   if (getState() == FINISHED)

     Dev.reportInvocationComplete(this);

 }


 // Private helper functions for executing simple instructions.


 template <typename OpTy, typename F>

 static auto apply(const std::array<OpTy, 1> Operands, const F &Op)

 {

   return Op(Operands[0]);

 }


 template <typename OpTy, typename F>

 static auto apply(const std::array<OpTy, 2> Operands, const F &Op)

 {

   return Op(Operands[0], Operands[1]);

 }


 template <typename OpTy, typename F>

 static auto apply(const std::array<OpTy, 3> Operands, const F &Op)

 {

   return Op(Operands[0], Operands[1], Operands[2]);

 }


 template <typename OpTy, unsigned N, unsigned Offset, typename F>

 void Invocation::executeOp(const Instruction *Inst, const F &Op)

 {

   uint32_t Id = Inst->getOperand(1);

   Object Result(Inst->getResultType());

   std::array<OpTy, N> Operands;


   // Loop over each vector component.

   for (uint32_t i = 0; i < Inst->getResultType()->getElementCount(); i++)

   {

     // Gather operands.

     for (unsigned j = 0; j < N; j++)

       Operands[j] = Objects[Inst->getOperand(Offset + j)].get<OpTy>(i);


     // Apply lambda and set result.

     Result.set(apply(Operands, Op), i);

   }


   Objects[Id] = Result;

 }


 template <unsigned N, unsigned Offset, typename F>

 void Invocation::executeOpSInt(const Instruction *Inst, const F &&Op)

 {

   const Type *OpType = Objects[Inst->getOperand(Offset)].getType();

   OpType = OpType->getScalarType();

   assert(OpType->isInt());

   switch (OpType->getBitWidth())

   {

   case 8:

     executeOp<int8_t, N, Offset>(Inst, Op);

     break;

   case 16:

     executeOp<int16_t, N, Offset>(Inst, Op);

     break;

   case 32:

     executeOp<int32_t, N, Offset>(Inst, Op);

     break;

   case 64:

     executeOp<int64_t, N, Offset>(Inst, Op);

     break;

   default:

     assert(false && "Unhandled binary operation integer width");

   }

 }


 template <unsigned N, unsigned Offset, typename F>

 void Invocation::executeOpFP(const Instruction *Inst, const F &&Op)

 {

   const Type *OpType = Objects[Inst->getOperand(Offset)].getType();

   OpType = OpType->getScalarType();

   assert(OpType->isFloat());

   switch (OpType->getBitWidth())

   {

   case 32:

     executeOp<float, N, Offset>(Inst, Op);

     break;

   case 64:

     executeOp<double, N, Offset>(Inst, Op);

     break;

   default:

     assert(false && "Unhandled binary operation floating point size");

   }

 }


 template <unsigned N, unsigned Offset, typename F>

 void Invocation::executeOpUInt(const Instruction *Inst, const F &&Op)

 {

   const Type *OpType = Objects[Inst->getOperand(Offset)].getType();

   OpType = OpType->getScalarType();

   assert(OpType->isInt());

   switch (OpType->getBitWidth())

   {

   case 8:

     executeOp<uint8_t, N>(Inst, Op);

     break;

   case 16:

     executeOp<uint16_t, N>(Inst, Op);

     break;

   case 32:

     executeOp<uint32_t, N>(Inst, Op);

     break;

   case 64:

     executeOp<uint64_t, N>(Inst, Op);

     break;

   default:

     assert(false && "Unhandled binary operation integer width");

   }

 }


 } // namespace talvos

talvos::Variable
This class represents a module-scope variable declaration.
Definition: Variable.h:21

talvos::Block::getLabel
Instruction & getLabel() const
Returns the label instruction for this block.
Definition: Block.h:39

Block.h
This file declares the Block class.

talvos::Invocation::executeImageWrite
void executeImageWrite(const Instruction *Inst)
Definition: Invocation.cpp:1069

talvos::Object::getDescriptorElements
const DescriptorElement * getDescriptorElements() const
Returns the descriptor array element information.
Definition: Object.cpp:106

Workgroup.h
This file declares the Workgroup class.

talvos::Device::reportError
void reportError(const std::string &Error, bool Fatal=false)
Report an error that has occurred during emulation.
Definition: Device.cpp:146

talvos::Invocation::PhiTemps
std::vector< std::pair< uint32_t, Object > > PhiTemps
Temporary OpPhi results to be applied when we reach first non-OpPhi.
Definition: Invocation.h:235

talvos::Type::isScalar
bool isScalar() const
Returns true if this is a scalar type.
Definition: Type.cpp:77

talvos::Invocation::executeCompositeConstruct
void executeCompositeConstruct(const Instruction *Inst)
Definition: Invocation.cpp:503

talvos::Invocation::executeFRem
void executeFRem(const Instruction *Inst)
Definition: Invocation.cpp:832

talvos::Invocation::executePhi
void executePhi(const Instruction *Inst)
Definition: Invocation.cpp:1240

talvos::Invocation::executeIsInf
void executeIsInf(const Instruction *Inst)
Definition: Invocation.cpp:1110

Device.h
This file declares the Device class.

talvos::ImageView::write
void write(const Image::Texel &T, uint32_t X, uint32_t Y=0, uint32_t Z=0, uint32_t Layer=0, uint32_t MipLevel=0) const
Write a texel to the image view at the specified coordinate.
Definition: Image.cpp:511

talvos::Image
This class represents an image object.
Definition: Image.h:24

talvos::Invocation::PreviousBlock
uint32_t PreviousBlock
The previous block (for OpPhi).
Definition: Invocation.h:206

talvos::Type::isFloat
bool isFloat() const
Returns true if this is a floating point type.
Definition: Type.h:107

talvos::Invocation::CallStack
std::vector< StackEntry > CallStack
The function call stack.
Definition: Invocation.h:222

DISPATCH
#define DISPATCH(Op, Func)

talvos::Type::getSize
size_t getSize() const
Returns the size of this type in bytes.
Definition: Type.h:81

talvos::Invocation::StackEntry::Allocations
std::vector< uint64_t > Allocations
Function scope allocations within this stack frame.
Definition: Invocation.h:219

talvos::Invocation::executeExtInst
void executeExtInst(const Instruction *Inst)
Definition: Invocation.cpp:663

talvos::Invocation::executeFAdd
void executeFAdd(const Instruction *Inst)
Definition: Invocation.cpp:748

talvos::Memory::copy
static void copy(uint64_t DstAddress, Memory &DstMem, uint64_t SrcAddress, const Memory &SrcMem, uint64_t NumBytes)
Copy data between memory instances.
Definition: Memory.cpp:328

talvos::Invocation::StackEntry
A data structure holding information for a function call.
Definition: Invocation.h:211

talvos::Memory::atomicCmpXchg
uint32_t atomicCmpXchg(uint64_t Address, uint32_t Scope, uint32_t EqualSemantics, uint32_t UnequalSemantics, uint32_t Value, uint32_t Comparator)
Perform an atomic compare-exchange operation at Address.
Definition: Memory.cpp:156

talvos::Invocation::Discarded
bool Discarded
True when fragment was discarded.
Definition: Invocation.h:208

talvos::Memory::release
void release(uint64_t Address)
Release the allocation with base address Address.
Definition: Memory.cpp:292

talvos::SampledImage
A combination of an image and a sampler used to access it.
Definition: Image.h:268

talvos::Invocation::executeCopyObject
void executeCopyObject(const Instruction *Inst)
Definition: Invocation.cpp:631

talvos::PtrMatrixLayout
Structure to describe the memory layout of a matrix.
Definition: Object.h:24

talvos::EntryPoint::getFunction
const Function * getFunction() const
Returns the function specified by this entry point.
Definition: EntryPoint.h:38

talvos::Invocation::executeCompositeInsert
void executeCompositeInsert(const Instruction *Inst)
Definition: Invocation.cpp:528

talvos::Invocation::executeImageQuerySize
void executeImageQuerySize(const Instruction *Inst)
Definition: Invocation.cpp:926

talvos::Invocation::executeConvertUToF
void executeConvertUToF(const Instruction *Inst)
Definition: Invocation.cpp:598

Module.h
This file declares the Module class.

talvos::Type::getElementOffset
size_t getElementOffset(uint64_t Index) const
Returns the byte offset of the element at Index.
Definition: Type.cpp:26

talvos::Type::getElementType
const Type * getElementType(uint64_t Index=0) const
Returns the type of the element at Index.
Definition: Type.cpp:38

talvos::Type::getScalarType
const Type * getScalarType() const
Returns the element type for vector types, or this for scalar types.
Definition: Type.cpp:49

talvos::Invocation::executeINotEqual
void executeINotEqual(const Instruction *Inst)
Definition: Invocation.cpp:1105

talvos::Image::Texel::toObject
Object toObject(const Type *Ty) const
Create an object with type Ty from the texel data.
Definition: Image.cpp:135

talvos::Type::isInt
bool isInt() const
Returns true if this is an integer type.
Definition: Type.h:110

talvos::Type::STRUCT
Definition: Type.h:50

talvos::Invocation::PrivateMemory
Memory * PrivateMemory
The private memory instance.
Definition: Invocation.h:229

talvos::Invocation::Dev
Device & Dev
The device this invocation is executing on.
Definition: Invocation.h:226

talvos::Invocation::executeAtomicOp
void executeAtomicOp(const Instruction *Inst)
Definition: Invocation.cpp:428

talvos::Invocation::executeIsNan
void executeIsNan(const Instruction *Inst)
Definition: Invocation.cpp:1115

talvos::Sampler::sample
void sample(const ImageView *Image, Image::Texel &Texel, float S, float T=0, float R=0, float A=0, float Lod=0) const
Sample a texel from an image at the specified coordinates.
Definition: Image.cpp:517

talvos::PipelineStage::getModule
std::shared_ptr< const Module > getModule() const
Return the module this pipeline stage is using.
Definition: PipelineStage.h:57

talvos::Invocation::executeFUnordGreaterThan
void executeFUnordGreaterThan(const Instruction *Inst)
Definition: Invocation.cpp:871

talvos::Invocation::executeAll
void executeAll(const Instruction *Inst)
Definition: Invocation.cpp:396

talvos::Invocation::executeFOrdLessThanEqual
void executeFOrdLessThanEqual(const Instruction *Inst)
Definition: Invocation.cpp:818

talvos::MemoryScope::Invocation

talvos::ImageView
This class represents a view into a range of image subresources.
Definition: Image.h:178

EntryPoint.h
This file declares the EntryPoint class.

talvos::Function
This class represents a function in a SPIR-V Module.
Definition: Function.h:23

talvos::Invocation::executeKill
void executeKill(const Instruction *Inst)
Definition: Invocation.cpp:1125

talvos::Invocation::executeAccessChain
void executeAccessChain(const Instruction *Inst)
Definition: Invocation.cpp:238

talvos::Invocation::executeMatrixTimesScalar
void executeMatrixTimesScalar(const Instruction *Inst)
Definition: Invocation.cpp:1164

talvos::Invocation::executeFDiv
void executeFDiv(const Instruction *Inst)
Definition: Invocation.cpp:768

talvos::Invocation::~Invocation
~Invocation()
Destroy this invocation.
Definition: Invocation.cpp:89

talvos::Invocation::executeReturn
void executeReturn(const Instruction *Inst)
Definition: Invocation.cpp:1257

talvos::Invocation::executeFSub
void executeFSub(const Instruction *Inst)
Definition: Invocation.cpp:838

talvos::Invocation::StackEntry::CallInst
const Instruction * CallInst
The calling instruction.
Definition: Invocation.h:214

talvos::Object::store
void store(Memory &Mem, uint64_t Address) const
Store the value of this object to memory at Address.
Definition: Object.cpp:317

talvos::Invocation::executeFOrdNotEqual
void executeFOrdNotEqual(const Instruction *Inst)
Definition: Invocation.cpp:825

talvos::PipelineStage::getEntryPoint
const EntryPoint * getEntryPoint() const
Return the entry point this pipeline stage will invoke.
Definition: PipelineStage.h:51

talvos::Instruction::getOperand
uint32_t getOperand(unsigned i) const
Returns the operand at index i;.
Definition: Instruction.h:52

talvos::Invocation::executeSelect
void executeSelect(const Instruction *Inst)
Definition: Invocation.cpp:1336

talvos::Invocation::Invocation
Invocation(Device &Dev, const std::vector< Object > &InitialObjects)
Create a standalone invocation for a device, with an initial set of result objects.
Definition: Invocation.cpp:38

talvos::Invocation::executeBitwiseXor
void executeBitwiseXor(const Instruction *Inst)
Definition: Invocation.cpp:487

talvos::Object::set
void set(T Value, uint32_t Element=0)
Set the value of this object to a scalar of type T.
Definition: Object.cpp:295

talvos::Type::getDimensionality
uint32_t getDimensionality() const
Returns the dimensionality of an image type.
Definition: Type.h:60

talvos::Invocation::CurrentModule
std::shared_ptr< const Module > CurrentModule
The current module.
Definition: Invocation.h:201

talvos::Instruction::getOpcode
uint16_t getOpcode() const
Returns the opcode.
Definition: Instruction.h:49

talvos::Invocation::executeIAdd
void executeIAdd(const Instruction *Inst)
Definition: Invocation.cpp:906

NOP
#define NOP(Op)

talvos::Image::Texel
This class represents a single texel with four 32-bit component values.
Definition: Image.h:28

talvos::Type::getElementCount
uint32_t getElementCount() const
Returns the number of elements in this array, struct, or vector type.
Definition: Type.h:64

talvos::Invocation::executeAtomicCompareExchange
void executeAtomicCompareExchange(const Instruction *Inst)
Definition: Invocation.cpp:454

talvos::Invocation::executeImageSampleExplicitLod
void executeImageSampleExplicitLod(const Instruction *Inst)
Definition: Invocation.cpp:1030

talvos::Invocation::executeNot
void executeNot(const Instruction *Inst)
Definition: Invocation.cpp:1235

talvos::Invocation::executeBitcast
void executeBitcast(const Instruction *Inst)
Definition: Invocation.cpp:470

Instruction.h
This file declares the Instruction class.

talvos::SampledImage::Image
const class ImageView * Image
Definition: Image.h:270

talvos::ImageView::getNumArrayLayers
uint32_t getNumArrayLayers() const
Returns the number of array layers in the image view.
Definition: Image.h:204

talvos::Object::load
static Object load(const Type *Ty, const Memory &Mem, uint64_t Address)
Create an object of type Ty from the data at Address.
Definition: Object.cpp:137

talvos::Invocation::executeFOrdLessThan
void executeFOrdLessThan(const Instruction *Inst)
Definition: Invocation.cpp:811

talvos::ImageView::getWidth
uint32_t getWidth(uint32_t Level=0) const
Get the width of the image view at the specified mip level.
Definition: Image.cpp:482

talvos::Invocation::executeFUnordNotEqual
void executeFUnordNotEqual(const Instruction *Inst)
Definition: Invocation.cpp:899

Type.h
This file declares the Type class.

talvos::Invocation::executeSampledImage
void executeSampledImage(const Instruction *Inst)
Definition: Invocation.cpp:1296

talvos::Invocation::StackEntry::CallFunc
const Function * CallFunc
The function containing CallInst.
Definition: Invocation.h:215

talvos::Dim3
Class representing a 3-dimensional size or ID.
Definition: Dim3.h:22

talvos::Function::getFirstBlockId
uint32_t getFirstBlockId() const
Returns the ID of the first block in this function.
Definition: Function.h:48

talvos::Invocation::State
State
Used to indicate whether an invocation is ready to execute, waiting at a barrier, or complete...
Definition: Invocation.h:38

talvos::Invocation::executeFMul
void executeFMul(const Instruction *Inst)
Definition: Invocation.cpp:780

talvos::Invocation::execute
void execute(const Instruction *Inst)
Execute Inst in this invocation.
Definition: Invocation.cpp:91

talvos::PtrMatrixLayout::Order
enum talvos::PtrMatrixLayout::@4 Order
Specifies the order of the elements in memory.

talvos::Invocation::executeConvertSToF
void executeConvertSToF(const Instruction *Inst)
Definition: Invocation.cpp:583

talvos::Invocation::executeIEqual
void executeIEqual(const Instruction *Inst)
Definition: Invocation.cpp:911

talvos::Invocation::executeFUnordLessThan
void executeFUnordLessThan(const Instruction *Inst)
Definition: Invocation.cpp:885

talvos::Invocation::executeCompositeExtract
void executeCompositeExtract(const Instruction *Inst)
Definition: Invocation.cpp:519

talvos::PtrMatrixLayout::Stride
uint32_t Stride
The stride in bytes between columns (COL_MAJOR) or rows (ROW_MAJOR).
Definition: Object.h:35

talvos::Memory::atomic
T atomic(uint64_t Address, uint32_t Opcode, uint32_t Scope, uint32_t Semantics, T Value=0)
Atomically apply operation defined by Opcode to Address.
Definition: Memory.cpp:81

talvos::PtrMatrixLayout::ROW_MAJOR
Definition: Object.h:29

talvos::Memory
This class represents an address space in the virtual device.
Definition: Memory.h:37

talvos::ImageView::getDepth
uint32_t getDepth(uint32_t Level=0) const
Get the depth of the image view at the specified mip level.
Definition: Image.cpp:465

talvos::Invocation::executeSDiv
void executeSDiv(const Instruction *Inst)
Definition: Invocation.cpp:1331

talvos::Invocation::executeISub
void executeISub(const Instruction *Inst)
Definition: Invocation.cpp:1120

PipelineStage.h
This file declares the PipelineStage class.

talvos::Workgroup::getVariables
const VariableList & getVariables() const
Return the workgroup scope variable pointer values.
Definition: Workgroup.h:61

talvos::Invocation::executeLogicalEqual
void executeLogicalEqual(const Instruction *Inst)
Definition: Invocation.cpp:1144

Image.h
This file declares data structures and functions for handling images.

talvos::Type::getBitWidth
uint32_t getBitWidth() const
Returns the bit-width of this type.
Definition: Type.cpp:20

talvos::Invocation::CurrentInstruction
const Instruction * CurrentInstruction
The current instruction.
Definition: Invocation.h:204

talvos::Type::getStorageClass
uint32_t getStorageClass() const
Returns the storage class of this type.
Definition: Type.cpp:60

talvos::Function::getNumParams
size_t getNumParams() const
Returns the number of parameters in this function.
Definition: Function.h:57

talvos::Device
A Device instance encapsulates properties and state for the virtual device.
Definition: Device.h:29

talvos::Invocation::executeLogicalNotEqual
void executeLogicalNotEqual(const Instruction *Inst)
Definition: Invocation.cpp:1154

talvos::Invocation::executeLogicalAnd
void executeLogicalAnd(const Instruction *Inst)
Definition: Invocation.cpp:1139

talvos::Invocation::executeAny
void executeAny(const Instruction *Inst)
Definition: Invocation.cpp:412

talvos::Invocation::StackEntry::CallBlock
uint32_t CallBlock
The block containing CallInst.
Definition: Invocation.h:216

talvos::Workgroup
This class represents a workgroup executing a compute command.
Definition: Workgroup.h:27

talvos::Invocation::executeMatrixTimesVector
void executeMatrixTimesVector(const Instruction *Inst)
Definition: Invocation.cpp:1197

talvos::Invocation::executeCopyMemory
void executeCopyMemory(const Instruction *Inst)
Definition: Invocation.cpp:613

talvos::Invocation::executeConvertFToU
void executeConvertFToU(const Instruction *Inst)
Definition: Invocation.cpp:565

talvos::Object::getData
uint8_t * getData()
Returns a mutable pointer to the raw data backing this object.
Definition: Object.h:88

talvos::Invocation::executeLogicalNot
void executeLogicalNot(const Instruction *Inst)
Definition: Invocation.cpp:1149

talvos::Invocation::executeFOrdGreaterThan
void executeFOrdGreaterThan(const Instruction *Inst)
Definition: Invocation.cpp:797

talvos::Sampler
This class represents a sampler object.
Definition: Image.h:252

talvos::Instruction::getNumOperands
uint16_t getNumOperands() const
Returns the number of operands this instruction has.
Definition: Instruction.h:46

talvos::Function::getParamId
uint32_t getParamId(uint32_t I) const
Returns the ID of the parameter at index I.
Definition: Function.h:54

talvos::Object::getType
const Type * getType() const
Returns the type of this object.
Definition: Object.h:101

talvos::PipelineStage
This class encapsulates information about a pipeline stage.
Definition: PipelineStage.h:30

talvos::Instruction::getResultType
const Type * getResultType() const
Returns the result type of this instruction, or nullptr if it does not produce a result.
Definition: Instruction.h:59

talvos::Device::reportInvocationBegin
void reportInvocationBegin(const Invocation *Invoc)
Definition: Device.cpp:259

talvos::ImageView::getHeight
uint32_t getHeight(uint32_t Level=0) const
Get the height of the image view at the specified mip level.
Definition: Image.cpp:470

talvos::Invocation::executeBitwiseAnd
void executeBitwiseAnd(const Instruction *Inst)
Definition: Invocation.cpp:477

Memory.h
This file declares the Memory class.

talvos::Invocation::AtBarrier
bool AtBarrier
True when at a barrier.
Definition: Invocation.h:207

talvos::Instruction::getOperands
const uint32_t * getOperands() const
Returns the operands.
Definition: Instruction.h:55

talvos::Invocation::executeFUnordLessThanEqual
void executeFUnordLessThanEqual(const Instruction *Inst)
Definition: Invocation.cpp:892

talvos::Invocation::executeBitwiseOr
void executeBitwiseOr(const Instruction *Inst)
Definition: Invocation.cpp:482

talvos::Invocation::executeDot
void executeDot(const Instruction *Inst)
Definition: Invocation.cpp:636

talvos::Object::getMatrixLayout
const PtrMatrixLayout & getMatrixLayout() const
Get the matrix layout for this object.
Definition: Object.cpp:111

talvos::Invocation::executeFNegate
void executeFNegate(const Instruction *Inst)
Definition: Invocation.cpp:785

talvos::Invocation::executeFUnordGreaterThanEqual
void executeFUnordGreaterThanEqual(const Instruction *Inst)
Definition: Invocation.cpp:878

talvos::Invocation::executeImageRead
void executeImageRead(const Instruction *Inst)
Definition: Invocation.cpp:979

talvos::Invocation::executeLoad
void executeLoad(const Instruction *Inst)
Definition: Invocation.cpp:1131

talvos::Type
This class represents a SPIR-V type.
Definition: Type.h:33

Function.h
This file declares the Function class.

talvos::Object::insert
void insert(const std::vector< uint32_t > &Indices, const Object &Element)
Insert the value of Element into a composite object.
Definition: Object.cpp:118

talvos::Invocation::executeFConvert
void executeFConvert(const Instruction *Inst)
Definition: Invocation.cpp:753

Invocation.h
This file declares the Invocation class.

talvos::Memory::allocate
uint64_t allocate(uint64_t NumBytes)
Allocate a new buffer of size NumBytes.
Definition: Memory.cpp:52

talvos::Invocation::executeConvertFToS
void executeConvertFToS(const Instruction *Inst)
Definition: Invocation.cpp:547

talvos::Invocation::Objects
std::vector< Object > Objects
Set of result objects.
Definition: Invocation.h:224

talvos::Type::isArrayedImage
bool isArrayedImage() const
Returns the Arrayed flag of an image type.
Definition: Type.h:98

talvos::Invocation::executeControlBarrier
void executeControlBarrier(const Instruction *Inst)
Definition: Invocation.cpp:540

OP
#define OP(Index, Type)
Get scalar operand at index Index with type Type.
Definition: Invocation.cpp:33

talvos::Invocation::executeImage
void executeImage(const Instruction *Inst)
Definition: Invocation.cpp:916

talvos::Object
This class represents an instruction result.
Definition: Object.h:51

talvos::Memory::store
void store(uint64_t Address, uint64_t NumBytes, const uint8_t *Data)
Store NumBytes of data from Data to Address.
Definition: Memory.cpp:306

talvos::Invocation::moveToBlock
void moveToBlock(uint32_t Id)
Move this invocation to the block with ID Id.
Definition: Invocation.cpp:1687

talvos::Invocation::executeReturnValue
void executeReturnValue(const Instruction *Inst)
Definition: Invocation.cpp:1276

talvos::Invocation::executeBranch
void executeBranch(const Instruction *Inst)
Definition: Invocation.cpp:492

talvos::Invocation::executeIMul
void executeIMul(const Instruction *Inst)
Definition: Invocation.cpp:1100

talvos::Instruction::next
const Instruction * next() const
Get the next instruction in the containing block.
Definition: Instruction.h:68

talvos::Invocation::CurrentBlock
uint32_t CurrentBlock
The current block.
Definition: Invocation.h:205

talvos::Invocation::executeBranchConditional
void executeBranchConditional(const Instruction *Inst)
Definition: Invocation.cpp:497

talvos::SampledImage::Sampler
const class Sampler * Sampler
Definition: Image.h:271

talvos::Invocation::executeFUnordEqual
void executeFUnordEqual(const Instruction *Inst)
Definition: Invocation.cpp:864

talvos::Instruction
This class represents a SPIR-V instruction.
Definition: Instruction.h:27

talvos::Invocation::executeSConvert
void executeSConvert(const Instruction *Inst)
Definition: Invocation.cpp:1313

talvos::Invocation::executeLogicalOr
void executeLogicalOr(const Instruction *Inst)
Definition: Invocation.cpp:1159

Variable.h
This file declares the Variable class.

talvos::Invocation::executeFOrdEqual
void executeFOrdEqual(const Instruction *Inst)
Definition: Invocation.cpp:790

talvos::Invocation::executeFunctionCall
void executeFunctionCall(const Instruction *Inst)
Definition: Invocation.cpp:843

talvos::Object::get
T get(uint32_t Element=0) const
Get the value of this object as a scalar of type T.
Definition: Object.cpp:97

talvos::PtrMatrixLayout::COL_MAJOR
Definition: Object.h:30

talvos::Object::extract
Object extract(const std::vector< uint32_t > &Indices) const
Extract an element from a composite object.
Definition: Object.cpp:75

talvos::Invocation::getMemory
Memory & getMemory(uint32_t StorageClass)
Returns the memory instance associated with StorageClass.
Definition: Invocation.cpp:1648

talvos::Invocation::CurrentFunction
const Function * CurrentFunction
The current function.
Definition: Invocation.h:203

talvos::Invocation::executeFOrdGreaterThanEqual
void executeFOrdGreaterThanEqual(const Instruction *Inst)
Definition: Invocation.cpp:804

talvos::Invocation::PipelineMemory
std::shared_ptr< Memory > PipelineMemory
Memory used for input and output storage classes.
Definition: Invocation.h:232

talvos::ImageView::read
void read(Image::Texel &T, uint32_t X, uint32_t Y=0, uint32_t Z=0, uint32_t Layer=0, uint32_t MipLevel=0) const
Read a texel from the image view at the specified coordinate.
Definition: Image.cpp:505

talvos::DescriptorElement::Address
uint64_t Address
Address of descriptor element.
Definition: Object.h:44

talvos::Invocation::executeFMod
void executeFMod(const Instruction *Inst)
Definition: Invocation.cpp:773

talvos::Block
A block of instructions ending with a termination instruction.
Definition: Block.h:21