Talvos  0.1
SPIR-V interpreter and dynamic analysis framework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
PipelineExecutor.cpp
Go to the documentation of this file.
1 // Copyright (c) 2018 the Talvos developers. All rights reserved.
2 //
3 // This file is distributed under a three-clause BSD license. For full license
4 // terms please see the LICENSE file distributed with this source code.
5 
8 
9 #include "config.h"
10 
11 #include <algorithm>
12 #include <cassert>
13 #include <cmath>
14 #include <iostream>
15 #include <iterator>
16 #include <limits>
17 #include <sstream>
18 #include <thread>
19 
20 #if defined(_WIN32) && !defined(__MINGW32__)
21 #define NOMINMAX
22 #include <io.h>
23 #include <windows.h>
24 #define isatty _isatty
25 #define STDIN_FILENO _fileno(stdin)
26 #undef ERROR
27 #undef VOID
28 #else
29 #include <unistd.h>
30 #endif
31 
32 #if HAVE_READLINE
33 #include <readline/history.h>
34 #include <readline/readline.h>
35 #endif
36 
37 #include <spirv/unified1/spirv.h>
38 
39 #include "PipelineExecutor.h"
40 #include "Utils.h"
41 #include "talvos/Commands.h"
42 #include "talvos/ComputePipeline.h"
43 #include "talvos/Device.h"
44 #include "talvos/EntryPoint.h"
46 #include "talvos/Image.h"
47 #include "talvos/Instruction.h"
48 #include "talvos/Invocation.h"
49 #include "talvos/Memory.h"
50 #include "talvos/Module.h"
51 #include "talvos/PipelineStage.h"
52 #include "talvos/RenderPass.h"
53 #include "talvos/Type.h"
54 #include "talvos/Variable.h"
55 #include "talvos/Workgroup.h"
56 
58 #define CONTEXT_SIZE 3
59 
60 namespace talvos
61 {
62 
64 static thread_local bool IsWorkerThread = false;
65 static thread_local Workgroup *CurrentGroup;
66 static thread_local Invocation *CurrentInvocation;
67 
69 std::map<uint32_t, uint32_t> PipelineExecutor::Breakpoints;
70 
73 {
75  std::vector<VertexOutput> VertexOutputs;
76 };
77 
80 {
81  std::map<SpvBuiltIn, Object> BuiltIns;
82 
84  std::map<std::pair<uint32_t, uint32_t>, Object> Locations;
85 };
86 
89 {
90  float X;
91  float Y;
92  float PointSize;
93 
94  const VertexOutput &Out;
95 };
96 
99 {
103 
107 };
108 
110  : Dev(Dev), CurrentCommand(nullptr), CurrentStage(nullptr)
111 {
112  ShutDownWorkers = false;
113 
114  Interactive = checkEnv("TALVOS_INTERACTIVE", false);
115 
116  // Get number of worker threads to launch.
117  NumThreads = 1;
118  if (!Interactive && Dev.isThreadSafe())
119  NumThreads = (uint32_t)getEnvUInt("TALVOS_NUM_WORKERS",
120  std::thread::hardware_concurrency());
121 }
122 
124 {
125  // Signal workers to exit.
126  WorkerMutex.lock();
127  ShutDownWorkers = true;
128  WorkerSignal.notify_all();
129  WorkerMutex.unlock();
130 
131  // Wait for workers to complete.
132  for (auto &WT : WorkerThreads)
133  WT.join();
134 }
135 
137 {
138  const DispatchCommand *DC = (const DispatchCommand *)CurrentCommand;
139 
140  // Create workgroup.
141  Workgroup *Group = new Workgroup(Dev, *this, GroupId);
142 
143  // Create invocations for this group.
144  Dim3 GroupSize = CurrentStage->getGroupSize();
145  for (uint32_t LZ = 0; LZ < GroupSize.Z; LZ++)
146  {
147  for (uint32_t LY = 0; LY < GroupSize.Y; LY++)
148  {
149  for (uint32_t LX = 0; LX < GroupSize.X; LX++)
150  {
151  Dim3 LocalId(LX, LY, LZ);
152  Dim3 GlobalId = LocalId + GroupId * GroupSize;
153  uint32_t LocalIndex = LX + (LY + (LZ * GroupSize.Y)) * GroupSize.X;
154  std::vector<Object> InitialObjects = Objects;
155 
156  // Create pipeline memory and populate with builtin variables.
157  std::shared_ptr<Memory> PipelineMemory =
158  std::make_shared<Memory>(Dev, MemoryScope::Invocation);
159  for (auto Var : CurrentStage->getEntryPoint()->getVariables())
160  {
161  const Type *Ty = Var->getType();
162  if (Ty->getStorageClass() != SpvStorageClassInput)
163  continue;
164 
165  size_t Sz = Ty->getElementType()->getSize();
166  uint64_t Address = PipelineMemory->allocate(Sz);
167  switch (Var->getDecoration(SpvDecorationBuiltIn))
168  {
169  case SpvBuiltInGlobalInvocationId:
170  PipelineMemory->store(Address, Sz, (uint8_t *)GlobalId.Data);
171  break;
172  case SpvBuiltInLocalInvocationId:
173  PipelineMemory->store(Address, Sz, (uint8_t *)LocalId.Data);
174  break;
175  case SpvBuiltInLocalInvocationIndex:
176  PipelineMemory->store(Address, Sz, (uint8_t *)&LocalIndex);
177  break;
178  case SpvBuiltInNumWorkgroups:
179  PipelineMemory->store(Address, Sz,
180  (uint8_t *)DC->getNumGroups().Data);
181  break;
182  case SpvBuiltInWorkgroupId:
183  PipelineMemory->store(Address, Sz, (uint8_t *)GroupId.Data);
184  break;
185  default:
186  std::cerr << "Unimplemented input variable builtin: "
187  << Var->getDecoration(SpvDecorationBuiltIn) << std::endl;
188  abort();
189  }
190 
191  // Set pointer value.
192  InitialObjects[Var->getId()] = Object(Ty, Address);
193  }
194 
195  // Create invocation and add to group.
196  Group->addWorkItem(
197  std::make_unique<Invocation>(Dev, *CurrentStage, InitialObjects,
198  PipelineMemory, Group, GlobalId));
199  }
200  }
201  }
202 
203  return Group;
204 }
205 
207 {
208  return CurrentInvocation;
209 }
210 
212 {
213  return CurrentGroup;
214 }
215 
216 bool PipelineExecutor::isWorkerThread() const { return IsWorkerThread; }
217 
219 {
220  assert(CurrentCommand == nullptr);
221  CurrentCommand = &Cmd;
222 
223  const PipelineContext &PC = Cmd.getPipelineContext();
224  const ComputePipeline *PL = PC.getComputePipeline();
225  assert(PL != nullptr);
226  CurrentStage = PL->getStage();
227 
228  // Allocate and initialize push constant data.
229  Memory &GlobalMem = Dev.getGlobalMemory();
230  uint64_t PushConstantAddress =
232  GlobalMem.store(PushConstantAddress, PipelineContext::PUSH_CONSTANT_MEM_SIZE,
233  PC.getPushConstantData());
234 
236  initializeVariables(PC.getComputeDescriptors(), PushConstantAddress);
237 
238  assert(PendingGroups.empty());
239  assert(RunningGroups.empty());
240 
241  Continue = false;
242  // TODO: Print info about current command (entry name, dispatch size, etc).
243 
244  // Build list of pending group IDs.
245  Dim3 BaseGroup = Cmd.getBaseGroup();
246  for (uint32_t GZ = 0; GZ < Cmd.getNumGroups().Z; GZ++)
247  for (uint32_t GY = 0; GY < Cmd.getNumGroups().Y; GY++)
248  for (uint32_t GX = 0; GX < Cmd.getNumGroups().X; GX++)
249  PendingGroups.push_back(
250  {BaseGroup.X + GX, BaseGroup.Y + GY, BaseGroup.Z + GZ});
251 
252  // Run worker threads to process groups.
253  NextWorkIndex = 0;
254  doWork([&]() { runComputeWorker(); });
255 
256  finalizeVariables(PC.getComputeDescriptors());
257  GlobalMem.release(PushConstantAddress);
258 
259  PendingGroups.clear();
260  CurrentCommand = nullptr;
261 }
262 
263 void PipelineExecutor::run(const talvos::DrawCommandBase &Cmd)
264 {
265  assert(CurrentCommand == nullptr);
266  CurrentCommand = &Cmd;
267 
268  Continue = false;
269 
270  const PipelineContext &PC = Cmd.getPipelineContext();
271  const GraphicsPipeline *PL = PC.getGraphicsPipeline();
272  assert(PL != nullptr);
273 
274  // Get selected viewport.
275  // TODO: Handle multiple viewports (and ViewportIndex)
276  assert(Cmd.getPipelineContext().getViewports().size() == 1);
277  VkViewport Viewport = Cmd.getPipelineContext().getViewports()[0];
278 
279  // Allocate and initialize push constant data.
280  Memory &GlobalMem = Dev.getGlobalMemory();
281  uint64_t PushConstantAddress =
282  GlobalMem.allocate(PipelineContext::PUSH_CONSTANT_MEM_SIZE);
283  GlobalMem.store(PushConstantAddress, PipelineContext::PUSH_CONSTANT_MEM_SIZE,
284  PC.getPushConstantData());
285 
286  // Set up vertex shader stage pipeline memories.
287  RenderPipelineState State;
288  State.VertexOutputs.resize(Cmd.getNumVertices());
289 
290  // Loop over instances.
291  for (uint32_t Instance = 0; Instance < Cmd.getNumInstances(); Instance++)
292  {
293  uint32_t InstanceIndex = Instance + Cmd.getInstanceOffset();
294 
295  // Prepare vertex stage objects.
296  CurrentStage = PL->getVertexStage();
297  Objects = CurrentStage->getObjects();
298  initializeVariables(PC.getGraphicsDescriptors(), PushConstantAddress);
299 
300  // Run worker threads to process vertices.
301  NextWorkIndex = 0;
302  doWork([&]() { runVertexWorker(&State, InstanceIndex); });
303 
304  finalizeVariables(PC.getGraphicsDescriptors());
305 
306  // Discard primitves before rasterization if requested.
307  if (PL->getRasterizationState().rasterizerDiscardEnable)
308  continue;
309 
310  // Switch to fragment shader for rasterization.
311  CurrentStage = PL->getFragmentStage();
312  assert(CurrentStage && "rendering without fragment shader not implemented");
313  Objects = CurrentStage->getObjects();
314  initializeVariables(PC.getGraphicsDescriptors(), PushConstantAddress);
315 
316  // TODO: Handle other topologies
317  VkPrimitiveTopology Topology = PL->getTopology();
318  switch (Topology)
319  {
320  case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
321  {
322  for (uint32_t v = 0; v < Cmd.getNumVertices(); v++)
323  rasterizePoint(Cmd, Viewport, State.VertexOutputs[v]);
324  break;
325  }
326  case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
327  {
328  for (uint32_t v = 0; v < Cmd.getNumVertices(); v += 3)
329  rasterizeTriangle(Cmd, Viewport, State.VertexOutputs[v],
330  State.VertexOutputs[v + 1],
331  State.VertexOutputs[v + 2]);
332  break;
333  }
334  case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
335  {
336  for (uint32_t v = 2; v < Cmd.getNumVertices(); v++)
337  {
338  const VertexOutput &A = State.VertexOutputs[v - 2];
339  const VertexOutput &B = State.VertexOutputs[v - 1];
340 
341  const VertexOutput &C = State.VertexOutputs[v];
342  rasterizeTriangle(Cmd, Viewport, A, B, C);
343 
344  if (++v >= Cmd.getNumVertices())
345  break;
346 
347  const VertexOutput &D = State.VertexOutputs[v];
348  rasterizeTriangle(Cmd, Viewport, B, D, C);
349  }
350  break;
351  }
352  case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
353  {
354  const VertexOutput &Center = State.VertexOutputs[0];
355  for (uint32_t v = 2; v < Cmd.getNumVertices(); v++)
356  {
357  const VertexOutput &A = State.VertexOutputs[v - 1];
358  const VertexOutput &B = State.VertexOutputs[v];
359  rasterizeTriangle(Cmd, Viewport, A, B, Center);
360  }
361  break;
362  }
363  default:
364  std::cerr << "Unimplemented primitive topology: " << Topology
365  << std::endl;
366  abort();
367  }
368 
369  finalizeVariables(PC.getGraphicsDescriptors());
370  }
371 
372  GlobalMem.release(PushConstantAddress);
373 
374  CurrentStage = nullptr;
375  CurrentCommand = nullptr;
376 }
377 
378 void PipelineExecutor::runComputeWorker()
379 {
380  IsWorkerThread = true;
381  CurrentInvocation = nullptr;
382 
383  // Loop until all groups are finished.
384  // A pool of running groups is maintained to allow the current group to be
385  // suspended and changed via the interactive debugger interface.
386  while (true)
387  {
388  // Get next group to run.
389  // Take from running pool first, then pending pool.
390  if (!RunningGroups.empty())
391  {
392  assert(NumThreads == 1);
393  CurrentGroup = RunningGroups.back();
394  RunningGroups.pop_back();
395  }
396  else if (NextWorkIndex < PendingGroups.size())
397  {
398  size_t GroupIndex = NextWorkIndex++;
399  if (GroupIndex >= PendingGroups.size())
400  break;
401  CurrentGroup = createWorkgroup(PendingGroups[GroupIndex]);
402  Dev.reportWorkgroupBegin(CurrentGroup);
403  }
404  else
405  {
406  // All groups are finished.
407  break;
408  }
409 
410  // Loop until all work items in the group have completed.
411  while (true)
412  {
413  // Step each invocation in group until it hits a barrier or completes.
414  // Note that the interact() calls can potentially change the current
415  // invocation and group being processed.
416  while (true)
417  {
418  // Get the next invocation in the current group in the READY state.
419  // TODO: Could move some of this logic into the Workgroup class?
420  const Workgroup::WorkItemList &WorkItems = CurrentGroup->getWorkItems();
421  auto I =
422  std::find_if(WorkItems.begin(), WorkItems.end(), [](const auto &I) {
423  return I->getState() == Invocation::READY;
424  });
425  if (I == WorkItems.end())
426  break;
427  CurrentInvocation = I->get();
428 
429  interact();
430  while (CurrentInvocation->getState() == Invocation::READY)
431  {
432  CurrentInvocation->step();
433  interact();
434  }
435  CurrentInvocation = nullptr;
436  }
437 
438  // Check for barriers.
439  // TODO: Move logic for barrier handling into Workgroup class?
440  const Workgroup::WorkItemList &WorkItems = CurrentGroup->getWorkItems();
441  size_t BarrierCount =
442  std::count_if(WorkItems.begin(), WorkItems.end(), [](const auto &I) {
443  return I->getState() == Invocation::BARRIER;
444  });
445  if (BarrierCount > 0)
446  {
447  // All invocations in the group must hit the barrier.
448  // TODO: Ensure they hit the *same* barrier?
449  // TODO: Allow for other execution scopes.
450  if (BarrierCount != WorkItems.size())
451  {
452  // TODO: Better error message.
453  // TODO: Try to carry on?
454  std::cerr << "Barrier not reached by every invocation." << std::endl;
455  abort();
456  }
457 
458  // Clear the barrier.
459  for (auto &WI : WorkItems)
460  WI->clearBarrier();
461  Dev.reportWorkgroupBarrier(CurrentGroup);
462  }
463  else
464  {
465  // All invocations must have completed - this group is done.
466  Dev.reportWorkgroupComplete(CurrentGroup);
467  delete CurrentGroup;
468  CurrentGroup = nullptr;
469  break;
470  }
471  }
472  }
473 }
474 
475 void PipelineExecutor::buildPendingFragments(const DrawCommandBase &Cmd,
476  int XMinFB, int XMaxFB, int YMinFB,
477  int YMaxFB)
478 {
479  const RenderPassInstance &RPI = Cmd.getRenderPassInstance();
480  const Framebuffer &FB = RPI.getFramebuffer();
481  const PipelineContext &PC = Cmd.getPipelineContext();
482 
483  // Clamp the bounding box to be within the framebuffer.
484  XMinFB = std::clamp(XMinFB, 0, (int)(FB.getWidth() - 1));
485  XMaxFB = std::clamp(XMaxFB, 0, (int)(FB.getWidth() - 1));
486  YMinFB = std::clamp(YMinFB, 0, (int)(FB.getHeight() - 1));
487  YMaxFB = std::clamp(YMaxFB, 0, (int)(FB.getHeight() - 1));
488 
489  // Clamp the bounding box to be within the scissor rectangle.
490  // TODO: Select correct scissor for current viewport
491  assert(PC.getScissors().size() == 1);
492  VkRect2D Scissor = PC.getScissors()[0];
493  XMinFB = std::max<int>(XMinFB, Scissor.offset.x);
494  XMaxFB = std::min<int>(XMaxFB, Scissor.offset.x + Scissor.extent.width - 1);
495  YMinFB = std::max<int>(YMinFB, Scissor.offset.y);
496  YMaxFB = std::min<int>(YMaxFB, Scissor.offset.y + Scissor.extent.height - 1);
497 
498  // Build list of framebuffer coordinates in the axis-aligned bounding box.
499  assert(PendingFragments.empty());
500  for (int YFB = YMinFB; YFB <= YMaxFB; YFB++)
501  for (int XFB = XMinFB; XFB <= XMaxFB; XFB++)
502  PendingFragments.push_back({(uint32_t)XFB, (uint32_t)YFB, 0});
503 }
504 
517 void interpolate(Object &Output, const Type *Ty, size_t Offset,
518  const Object &FA, const Object &FB, const Object &FC, float AW,
519  float BW, float CW, float InvW, float a, float b, float c,
520  bool Flat, bool Perspective)
521 {
522  if (Ty->isScalar())
523  {
524  if (Flat)
525  {
526  // Copy data from provoking vertex.
527  memcpy(Output.getData() + Offset, FA.getData() + Offset, Ty->getSize());
528  return;
529  }
530 
531  // Interpolation requires 32-bit floating point values.
532  assert(Ty->isFloat() && Ty->getBitWidth() == 32);
533 
534  // Interpolate scalar values between vertices.
535  float A = *(float *)(FA.getData() + Offset);
536  float B = *(float *)(FB.getData() + Offset);
537  float C = *(float *)(FC.getData() + Offset);
538  float F;
539  if (Perspective)
540  F = ((a * A / AW) + (b * B / BW) + (c * C / CW)) / InvW;
541  else
542  F = (a * A) + (b * B) + (c * C);
543 
544  *(float *)(Output.getData() + Offset) = F;
545  return;
546  }
547 
548  // Recurse through aggregate members.
549  for (uint32_t i = 0; i < Ty->getElementCount(); i++)
550  {
551  // Check for Flat and NoPerspective member decorations.
552  bool FlatElement = Flat;
553  bool PerspectiveElement = Perspective;
554  if (Ty->getTypeId() == Type::STRUCT)
555  {
556  if (Ty->getStructMemberDecorations(i).count(SpvDecorationFlat))
557  FlatElement = true;
558  if (Ty->getStructMemberDecorations(i).count(SpvDecorationNoPerspective))
559  PerspectiveElement = false;
560  }
561 
562  interpolate(Output, Ty->getElementType(i), Offset + Ty->getElementOffset(i),
563  FA, FB, FC, AW, BW, CW, InvW, a, b, c, FlatElement,
564  PerspectiveElement);
565  }
566 }
567 
568 float XDevToFB(float Xd, VkViewport Viewport)
569 {
570  return ((Viewport.width / 2.f) * Xd) + (Viewport.x + Viewport.width / 2.f);
571 }
572 
573 float XFBToDev(float Xfb, VkViewport Viewport)
574 {
575  return ((Xfb + 0.5f) - (Viewport.x + Viewport.width / 2.f)) /
576  (Viewport.width / 2.f);
577 }
578 
579 float YDevToFB(float Yd, VkViewport Viewport)
580 {
581  return ((Viewport.height / 2.f) * Yd) + (Viewport.y + Viewport.height / 2.f);
582 }
583 
584 float YFBToDev(float Yfb, VkViewport Viewport)
585 {
586  return ((Yfb + 0.5f) - (Viewport.y + Viewport.height / 2.f)) /
587  (Viewport.height / 2.f);
588 }
589 
590 // Blend a texel (NewTexel) against an existing color attachment (OldTexel).
591 void blendTexel(Image::Texel &NewTexel, const Image::Texel &OldTexel,
592  const VkPipelineColorBlendAttachmentState &Blend,
593  const std::array<float, 4> &BlendConstants)
594 {
595  Image::Texel Src = NewTexel;
596  Image::Texel Dst = OldTexel;
597 
598  // Lambda to get color component blend factor.
599  auto GetColorBlendFactor = [BlendConstants, Src, Dst](VkBlendFactor Factor,
600  float &R, float &G,
601  float &B) {
602  switch (Factor)
603  {
604  case VK_BLEND_FACTOR_ZERO:
605  R = G = B = 0.f;
606  break;
607  case VK_BLEND_FACTOR_ONE:
608  R = G = B = 1.f;
609  break;
610  case VK_BLEND_FACTOR_SRC_COLOR:
611  R = Src.get<float>(0);
612  G = Src.get<float>(1);
613  B = Src.get<float>(2);
614  break;
615  case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
616  R = 1.f - Src.get<float>(0);
617  G = 1.f - Src.get<float>(1);
618  B = 1.f - Src.get<float>(2);
619  break;
620  case VK_BLEND_FACTOR_DST_COLOR:
621  R = Dst.get<float>(0);
622  G = Dst.get<float>(1);
623  B = Dst.get<float>(2);
624  break;
625  case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
626  R = 1.f - Dst.get<float>(0);
627  G = 1.f - Dst.get<float>(1);
628  B = 1.f - Dst.get<float>(2);
629  break;
630  case VK_BLEND_FACTOR_SRC_ALPHA:
631  R = G = B = Src.get<float>(3);
632  break;
633  case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
634  R = G = B = 1.f - Src.get<float>(3);
635  break;
636  case VK_BLEND_FACTOR_DST_ALPHA:
637  R = G = B = Dst.get<float>(3);
638  break;
639  case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
640  R = G = B = 1.f - Dst.get<float>(3);
641  break;
642  case VK_BLEND_FACTOR_CONSTANT_COLOR:
643  R = BlendConstants[0];
644  G = BlendConstants[1];
645  B = BlendConstants[2];
646  break;
647  case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
648  R = 1.f - BlendConstants[0];
649  G = 1.f - BlendConstants[1];
650  B = 1.f - BlendConstants[2];
651  break;
652  case VK_BLEND_FACTOR_CONSTANT_ALPHA:
653  R = G = B = BlendConstants[3];
654  break;
655  case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
656  R = G = B = 1.f - BlendConstants[3];
657  break;
658  case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
659  R = G = B = std::min(Src.get<float>(3), 1.f - Dst.get<float>(3));
660  break;
661  default:
662  std::cerr << "Unhandled color blend factor: " << Factor << std::endl;
663  abort();
664  }
665  };
666 
667  // Apply blend operator to color components.
668  float Sr, Sg, Sb;
669  float Dr, Dg, Db;
670  GetColorBlendFactor(Blend.srcColorBlendFactor, Sr, Sg, Sb);
671  GetColorBlendFactor(Blend.dstColorBlendFactor, Dr, Dg, Db);
672  switch (Blend.colorBlendOp)
673  {
674  case VK_BLEND_OP_ADD:
675  NewTexel.set(0, Src.get<float>(0) * Sr + Dst.get<float>(0) * Dr);
676  NewTexel.set(1, Src.get<float>(1) * Sg + Dst.get<float>(1) * Dg);
677  NewTexel.set(2, Src.get<float>(2) * Sb + Dst.get<float>(2) * Db);
678  break;
679  case VK_BLEND_OP_SUBTRACT:
680  NewTexel.set(0, Src.get<float>(0) * Sr - Dst.get<float>(0) * Dr);
681  NewTexel.set(1, Src.get<float>(1) * Sg - Dst.get<float>(1) * Dg);
682  NewTexel.set(2, Src.get<float>(2) * Sb - Dst.get<float>(2) * Db);
683  break;
684  case VK_BLEND_OP_REVERSE_SUBTRACT:
685  NewTexel.set(0, Dst.get<float>(0) * Dr - Src.get<float>(0) * Sr);
686  NewTexel.set(1, Dst.get<float>(1) * Dg - Src.get<float>(1) * Sg);
687  NewTexel.set(2, Dst.get<float>(2) * Db - Src.get<float>(2) * Sb);
688  break;
689  case VK_BLEND_OP_MIN:
690  NewTexel.set(0, std::min(Src.get<float>(0), Dst.get<float>(0)));
691  NewTexel.set(1, std::min(Src.get<float>(1), Dst.get<float>(1)));
692  NewTexel.set(2, std::min(Src.get<float>(2), Dst.get<float>(2)));
693  break;
694  case VK_BLEND_OP_MAX:
695  NewTexel.set(0, std::max(Src.get<float>(0), Dst.get<float>(0)));
696  NewTexel.set(1, std::max(Src.get<float>(1), Dst.get<float>(1)));
697  NewTexel.set(2, std::max(Src.get<float>(2), Dst.get<float>(2)));
698  break;
699  default:
700  std::cerr << "Unhandled color blend operation: " << Blend.colorBlendOp
701  << std::endl;
702  abort();
703  }
704 
705  // Lambda to get alpha component blend factor.
706  auto GetAlphaBlendFactor = [BlendConstants, Src, Dst](VkBlendFactor Factor,
707  float &A) {
708  switch (Factor)
709  {
710  case VK_BLEND_FACTOR_ZERO:
711  A = 0.f;
712  break;
713  case VK_BLEND_FACTOR_ONE:
714  case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
715  A = 1.f;
716  break;
717  case VK_BLEND_FACTOR_SRC_COLOR:
718  case VK_BLEND_FACTOR_SRC_ALPHA:
719  A = Src.get<float>(3);
720  break;
721  case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
722  case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
723  A = 1.f - Src.get<float>(3);
724  break;
725  case VK_BLEND_FACTOR_DST_COLOR:
726  case VK_BLEND_FACTOR_DST_ALPHA:
727  A = Dst.get<float>(3);
728  break;
729  case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
730  case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
731  A = 1.f - Dst.get<float>(3);
732  break;
733  case VK_BLEND_FACTOR_CONSTANT_COLOR:
734  case VK_BLEND_FACTOR_CONSTANT_ALPHA:
735  A = BlendConstants[3];
736  break;
737  case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
738  case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
739  A = 1.f - BlendConstants[3];
740  break;
741  default:
742  std::cerr << "Unhandled alpha blend factor: " << Factor << std::endl;
743  abort();
744  }
745  };
746 
747  // Apply blend operator to alpha component.
748  float Sa;
749  float Da;
750  GetAlphaBlendFactor(Blend.srcAlphaBlendFactor, Sa);
751  GetAlphaBlendFactor(Blend.dstAlphaBlendFactor, Da);
752  switch (Blend.alphaBlendOp)
753  {
754  case VK_BLEND_OP_ADD:
755  NewTexel.set(3, Src.get<float>(3) * Sa + Dst.get<float>(3) * Da);
756  break;
757  case VK_BLEND_OP_SUBTRACT:
758  NewTexel.set(3, Src.get<float>(3) * Sa - Dst.get<float>(3) * Da);
759  break;
760  case VK_BLEND_OP_REVERSE_SUBTRACT:
761  NewTexel.set(3, Dst.get<float>(3) * Da - Src.get<float>(3) * Sa);
762  break;
763  case VK_BLEND_OP_MIN:
764  NewTexel.set(3, std::min(Src.get<float>(3), Dst.get<float>(3)));
765  break;
766  case VK_BLEND_OP_MAX:
767  NewTexel.set(3, std::max(Src.get<float>(3), Dst.get<float>(3)));
768  break;
769  default:
770  std::cerr << "Unhandled alpha blend operation: " << Blend.alphaBlendOp
771  << std::endl;
772  abort();
773  }
774 }
775 
776 void PipelineExecutor::processFragment(
777  const Fragment &Frag, const RenderPassInstance &RPI,
778  std::function<void(uint32_t, uint32_t, const Variable *, const Type *,
779  Memory *, uint64_t)>
780  GenLocData)
781 {
782  const PipelineContext &PC =
783  ((const DrawCommandBase *)CurrentCommand)->getPipelineContext();
784  const Framebuffer &FB = RPI.getFramebuffer();
785  const RenderPass &RP = RPI.getRenderPass();
786 
787  // Data about a fragment shader output variable.
788  struct FragmentOutput
789  {
790  uint64_t Address;
791  uint32_t Location;
792  uint32_t Component;
793  };
794 
795  // Create pipeline memory and populate with input/output variables.
796  std::vector<Object> InitialObjects = Objects;
797  std::shared_ptr<Memory> PipelineMemory =
798  std::make_shared<Memory>(Dev, MemoryScope::Invocation);
799  std::map<const Variable *, FragmentOutput> Outputs;
800  for (auto Var : CurrentStage->getEntryPoint()->getVariables())
801  {
802  const Type *PtrTy = Var->getType();
803  const Type *VarTy = PtrTy->getElementType();
804  if (PtrTy->getStorageClass() == SpvStorageClassInput)
805  {
806  // Allocate storage for input variable.
807  uint64_t Address = PipelineMemory->allocate(VarTy->getSize());
808  InitialObjects[Var->getId()] = Object(PtrTy, Address);
809 
810  // Initialize input variable data.
811  if (Var->hasDecoration(SpvDecorationLocation))
812  {
813  uint32_t Location = Var->getDecoration(SpvDecorationLocation);
814  uint32_t Component = 0;
815  if (Var->hasDecoration(SpvDecorationComponent))
816  Component = Var->getDecoration(SpvDecorationComponent);
817  GenLocData(Location, Component, Var, VarTy, &*PipelineMemory, Address);
818  }
819  else if (Var->hasDecoration(SpvDecorationBuiltIn))
820  {
821  switch (Var->getDecoration(SpvDecorationBuiltIn))
822  {
823  case SpvBuiltInFragCoord:
824  {
825  // TODO: Sample shading affects x/y components
826  assert(VarTy->isVector() && VarTy->getSize() == 16);
827  float FragCoord[4] = {Frag.X + 0.5f, Frag.Y + 0.5f, Frag.Depth,
828  Frag.InvW};
829  PipelineMemory->store(Address, 16, (const uint8_t *)FragCoord);
830  break;
831  }
832  default:
833  assert(false && "Unhandled fragment input builtin");
834  }
835  }
836  else
837  {
838  assert(false && "Unhandled input variable type");
839  }
840  }
841  else if (PtrTy->getStorageClass() == SpvStorageClassOutput)
842  {
843  // Allocate storage for output variable.
844  uint64_t Address = PipelineMemory->allocate(VarTy->getSize());
845  InitialObjects[Var->getId()] = Object(PtrTy, Address);
846 
847  // Store output variable information.
848  assert(Var->hasDecoration(SpvDecorationLocation));
849  uint32_t Location = Var->getDecoration(SpvDecorationLocation);
850  uint32_t Component = 0;
851  if (Var->hasDecoration(SpvDecorationComponent))
852  Component = Var->getDecoration(SpvDecorationComponent);
853  Outputs[Var] = {Address, Location, Component};
854  }
855  }
856 
857  // Create fragment shader invocation.
858  CurrentInvocation = new Invocation(Dev, *CurrentStage, InitialObjects,
859  PipelineMemory, nullptr, Dim3(0, 0, 0));
860 
861  // Run shader invocation to completion.
862  interact();
863  while (CurrentInvocation->getState() == Invocation::READY)
864  {
865  CurrentInvocation->step();
866  interact();
867  }
868 
869  bool Discarded = CurrentInvocation->wasDiscarded();
870 
871  delete CurrentInvocation;
872  CurrentInvocation = nullptr;
873 
874  if (Discarded)
875  return;
876 
877  // Gather fragment outputs for each location.
878  std::vector<uint32_t> ColorAttachments =
879  RP.getSubpass(RPI.getSubpassIndex()).ColorAttachments;
880  std::map<uint32_t, Image::Texel> OutTexels;
881  for (auto Output : Outputs)
882  {
883  uint32_t Location = Output.second.Location;
884  assert(Location < ColorAttachments.size());
885 
886  // Get output variable data.
887  const Object &OutputData =
888  Object::load(Output.first->getType()->getElementType(), *PipelineMemory,
889  Output.second.Address);
890 
891  // Set texel component(s) for this variable.
892  assert(OutputData.getType()->isScalar() ||
893  OutputData.getType()->isVector());
894  assert(OutputData.getType()->getScalarType()->getSize() == 4);
895  Image::Texel T;
896  if (OutTexels.count(Location))
897  T = OutTexels.at(Location);
898  for (uint32_t i = 0; i < OutputData.getType()->getElementCount(); i++)
899  T.set(Output.second.Component + i, OutputData.get<uint32_t>(i));
900  OutTexels[Location] = T;
901  }
902 
903  BlendAttachmentStateList BlendAttachmentStates =
905  assert(BlendAttachmentStates.size() == ColorAttachments.size());
906 
907  // Write fragment outputs to color attachments.
908  for (auto OT : OutTexels)
909  {
910  uint32_t Ref = ColorAttachments[OT.first];
911  assert(Ref < RP.getNumAttachments());
912  assert(Ref < FB.getAttachments().size());
913 
914  // Write texel color to attachment.
915  const ImageView *Attach = FB.getAttachments()[Ref];
916 
917  Image::Texel NewTexel = OT.second;
918  Image::Texel OldTexel;
919  Attach->read(OldTexel, Frag.X, Frag.Y);
920 
921  // Set default value for alpha channel if not present in format.
922  if (!hasAlphaChannel(Attach->getFormat()))
923  OldTexel.set(3, 1.f);
924 
925  // Blend texel values if enabled.
926  const VkPipelineColorBlendAttachmentState &Blend =
927  BlendAttachmentStates[OT.first];
928  if (Blend.blendEnable == VK_TRUE)
929  {
930  // TODO: Skip blending for integer formats.
931  // TODO: Get blend constants from PC to allow for dynamic constants.
932  blendTexel(NewTexel, OldTexel, Blend,
934  }
935 
936  // Only update components included in the color write mask.
937  if (!(Blend.colorWriteMask & VK_COLOR_COMPONENT_R_BIT))
938  NewTexel.set(0, OldTexel.get<uint32_t>(0));
939  if (!(Blend.colorWriteMask & VK_COLOR_COMPONENT_G_BIT))
940  NewTexel.set(1, OldTexel.get<uint32_t>(1));
941  if (!(Blend.colorWriteMask & VK_COLOR_COMPONENT_B_BIT))
942  NewTexel.set(2, OldTexel.get<uint32_t>(2));
943  if (!(Blend.colorWriteMask & VK_COLOR_COMPONENT_A_BIT))
944  NewTexel.set(3, OldTexel.get<uint32_t>(3));
945 
946  // Write texel to attachment.
947  Attach->write(NewTexel, Frag.X, Frag.Y);
948  }
949 }
950 
951 void PipelineExecutor::runWorker()
952 {
953  uint32_t NextTaskID = 1;
954  while (true)
955  {
956  // Wait to receive work (or shut down).
957  {
958  std::unique_lock<std::mutex> Lock(WorkerMutex);
959  while (true)
960  {
961  if (ShutDownWorkers)
962  return;
963  if (CurrentTaskID == NextTaskID)
964  break;
965  WorkerSignal.wait(Lock);
966  }
967  }
968 
969  // Do work.
970  assert(CurrentTask);
971  CurrentTask();
972 
973  // If we are last worker to finish, notify master that work is complete.
974  if (++NumWorkersFinished == NumThreads)
975  {
976  WorkerMutex.lock();
977  MasterSignal.notify_one();
978  WorkerMutex.unlock();
979  }
980 
981  NextTaskID++;
982  }
983 }
984 
985 void PipelineExecutor::doWork(std::function<void()> Task)
986 {
987  // Create worker threads if necessary.
988  if (WorkerThreads.empty())
989  {
990  for (unsigned i = 0; i < NumThreads; i++)
991  WorkerThreads.push_back(std::thread(&PipelineExecutor::runWorker, this));
992  }
993 
994  // Signal worker threads to perform task.
995  NumWorkersFinished = 0;
996  CurrentTask = Task;
997  CurrentTaskID++;
998  WorkerMutex.lock();
999  WorkerSignal.notify_all();
1000  WorkerMutex.unlock();
1001 
1002  // Wait for worker threads to finish task.
1003  {
1004  std::unique_lock<std::mutex> Lock(WorkerMutex);
1005  MasterSignal.wait(Lock, [&]() { return NumWorkersFinished == NumThreads; });
1006  }
1007 
1008  CurrentTask = std::function<void()>();
1009 }
1010 
1011 void PipelineExecutor::runPointFragmentWorker(PointPrimitive Primitive,
1012  const RenderPassInstance &RPI)
1013 {
1014  IsWorkerThread = true;
1015  CurrentInvocation = nullptr;
1016 
1017  // Loop until all framebuffer coordinates have been processed.
1018  while (true)
1019  {
1020  // Get next framebuffer coordinate index.
1021  uint32_t WorkIndex = (uint32_t)NextWorkIndex++;
1022  if (WorkIndex >= PendingFragments.size())
1023  break;
1024 
1025  Fragment Frag;
1026  Frag.X = PendingFragments[WorkIndex].X;
1027  Frag.Y = PendingFragments[WorkIndex].Y;
1028  Frag.Depth = 0; // TODO
1029  Frag.InvW = 0; // TODO
1030 
1031  // Compute point coordinate.
1032  float S = 0.5f + (Frag.X + 0.5f - Primitive.X) / Primitive.PointSize;
1033  float T = 0.5f + (Frag.Y + 0.5f - Primitive.Y) / Primitive.PointSize;
1034 
1035  // Check if pixel is inside point radius.
1036  if (S < 0 || T < 0 || S > 1 || T > 1)
1037  continue;
1038 
1039  // Lambda for generating data for location variables.
1040  auto GenLocData = [&](uint32_t Location, uint32_t Component,
1041  const Variable *Var, const Type *VarTy, Memory *Mem,
1042  uint64_t Address) {
1043  const Object &Out = Primitive.Out.Locations.at({Location, Component});
1044  Mem->store(Address, VarTy->getSize(), Out.getData());
1045  };
1046 
1047  processFragment(Frag, RPI, GenLocData);
1048  }
1049 }
1050 
1051 void PipelineExecutor::runTriangleFragmentWorker(TrianglePrimitive Primitive,
1052  const PipelineContext &PC,
1053  const RenderPassInstance &RPI,
1054  const VkViewport &Viewport)
1055 {
1056  IsWorkerThread = true;
1057  CurrentInvocation = nullptr;
1058 
1059  // Get rasterization state.
1060  const VkPipelineRasterizationStateCreateInfo &RasterizationState =
1062 
1063  // Get vertex positions.
1064  Vec4 A = Primitive.PosA;
1065  Vec4 B = Primitive.PosB;
1066  Vec4 C = Primitive.PosC;
1067 
1068  // Lambda to compute the area of a triangle (doubled).
1069  auto TriArea2 = [](const Vec4 &A, const Vec4 &B, const Vec4 &C) {
1070  return (C.X - A.X) * (B.Y - A.Y) - (B.X - A.X) * (C.Y - A.Y);
1071  };
1072 
1073  // Compute the area of the triangle (doubled).
1074  float Area2 = TriArea2(A, B, C);
1075 
1076  // Determine whether triangle is front-facing.
1077  bool FrontFacing;
1078  switch (RasterizationState.frontFace)
1079  {
1080  case VK_FRONT_FACE_COUNTER_CLOCKWISE:
1081  FrontFacing = Area2 > 0;
1082  break;
1083  case VK_FRONT_FACE_CLOCKWISE:
1084  FrontFacing = Area2 < 0;
1085  break;
1086  default:
1087  std::cerr << "Invalid front-facing sign value" << std::endl;
1088  abort();
1089  }
1090 
1091  // Cull triangle if necessary.
1092  if ((FrontFacing && RasterizationState.cullMode & VK_CULL_MODE_FRONT_BIT) ||
1093  (!FrontFacing && RasterizationState.cullMode & VK_CULL_MODE_BACK_BIT))
1094  return;
1095 
1096  // Loop until all framebuffer coordinates have been processed.
1097  while (true)
1098  {
1099  // Get next framebuffer coordinate index.
1100  uint32_t WorkIndex = (uint32_t)NextWorkIndex++;
1101  if (WorkIndex >= PendingFragments.size())
1102  break;
1103 
1104  Fragment Frag;
1105  Frag.X = PendingFragments[WorkIndex].X;
1106  Frag.Y = PendingFragments[WorkIndex].Y;
1107 
1108  // Compute barycentric coordinates using normalized device coordinates.
1109  Vec4 DevCoord = {XFBToDev(Frag.X, Viewport), YFBToDev(Frag.Y, Viewport)};
1110  float a = TriArea2(B, C, DevCoord) / Area2;
1111  float b = TriArea2(C, A, DevCoord) / Area2;
1112  float c = TriArea2(A, B, DevCoord) / Area2;
1113 
1114  // Snap back to the edge for samples that are only just over.
1115  // This is nasty hack to deal with cases where two primitives should share
1116  // an edge, but rounding errors cause the one that owns it to skip a sample.
1117  if (fabs(a) < 1.e-7f)
1118  a = 0.f;
1119  if (fabs(b) < 1.e-7f)
1120  b = 0.f;
1121  if (fabs(c) < 1.e-7f)
1122  c = 0.f;
1123 
1124  // Check if pixel is inside triangle.
1125  if (!(a >= 0 && b >= 0 && c >= 0))
1126  continue;
1127 
1128  // Calculate edge vectors.
1129  float BCX = C.X - B.X;
1130  float BCY = C.Y - B.Y;
1131  float CAX = A.X - C.X;
1132  float CAY = A.Y - C.Y;
1133  float ABX = B.X - A.X;
1134  float ABY = B.Y - A.Y;
1135  if (!FrontFacing)
1136  {
1137  BCX = -BCX;
1138  BCY = -BCY;
1139  CAX = -CAX;
1140  CAY = -CAY;
1141  ABX = -ABX;
1142  ABY = -ABY;
1143  }
1144 
1145  // Only fill top-left edges to avoid double-sampling on shared edges.
1146  if (a == 0)
1147  {
1148  if (!((BCY == 0 && BCX < 0) || BCY > 0))
1149  continue;
1150  }
1151  if (b == 0)
1152  {
1153  if (!((CAY == 0 && CAX < 0) || CAY > 0))
1154  continue;
1155  }
1156  if (c == 0)
1157  {
1158  if (!((ABY == 0 && ABX < 0) || ABY > 0))
1159  continue;
1160  }
1161 
1162  // Compute fragment depth and 1/w using linear interpolation.
1163  Frag.Depth = (a * A.Z) + (b * B.Z) + (c * C.Z);
1164  Frag.InvW = (a / A.W) + (b / B.W) + (c / C.W);
1165 
1166  // Lambda for generating data for location variables.
1167  auto GenLocData = [&](uint32_t Location, uint32_t Component,
1168  const Variable *Var, const Type *VarTy, Memory *Mem,
1169  uint64_t Address) {
1170  // Gather output data from each vertex.
1171  const Object &FA = Primitive.OutA.Locations.at({Location, Component});
1172  const Object &FB = Primitive.OutB.Locations.at({Location, Component});
1173  const Object &FC = Primitive.OutC.Locations.at({Location, Component});
1174 
1175  // Interpolate vertex outputs to produce fragment input.
1176  Object VarObj(VarTy);
1177  interpolate(VarObj, VarTy, 0, FA, FB, FC, A.W, B.W, C.W, Frag.InvW, a, b,
1178  c, Var->hasDecoration(SpvDecorationFlat),
1179  !Var->hasDecoration(SpvDecorationNoPerspective));
1180  VarObj.store(*Mem, Address);
1181  };
1182 
1183  processFragment(Frag, RPI, GenLocData);
1184  }
1185 }
1186 
1187 void PipelineExecutor::runVertexWorker(struct RenderPipelineState *State,
1188  uint32_t InstanceIndex)
1189 {
1190  IsWorkerThread = true;
1191  CurrentInvocation = nullptr;
1192 
1193  const DrawCommandBase *DC = (const DrawCommandBase *)CurrentCommand;
1194 
1195  // Loop until all vertices are finished.
1196  while (true)
1197  {
1198  // Get next vertex index.
1199  uint32_t WorkIndex = (uint32_t)NextWorkIndex++;
1200  if (WorkIndex >= DC->getNumVertices())
1201  break;
1202 
1203  // Generate vertex index from work index.
1204  uint32_t VertexIndex;
1205  switch (DC->getType())
1206  {
1207  case Command::DRAW:
1208  VertexIndex = WorkIndex + DC->getVertexOffset();
1209  break;
1210  case Command::DRAW_INDEXED:
1211  {
1212  // Load index from memory.
1213  const DrawIndexedCommand *DIC = (const DrawIndexedCommand *)DC;
1214  uint64_t BaseAddress = DIC->getIndexBaseAddress();
1215  switch (DIC->getIndexType())
1216  {
1217  case VK_INDEX_TYPE_UINT16:
1218  {
1219  uint16_t VertexIndex16;
1220  Dev.getGlobalMemory().load(
1221  (uint8_t *)&VertexIndex16,
1222  BaseAddress + (WorkIndex + DIC->getIndexOffset()) * 2, 2);
1223  VertexIndex = VertexIndex16;
1224  break;
1225  }
1226  case VK_INDEX_TYPE_UINT32:
1227  Dev.getGlobalMemory().load(
1228  (uint8_t *)&VertexIndex,
1229  BaseAddress + (WorkIndex + DIC->getIndexOffset()) * 4, 4);
1230  break;
1231  default:
1232  assert(false && "Unhandled vertex index type");
1233  VertexIndex = UINT32_MAX;
1234  break;
1235  }
1236  VertexIndex += DC->getVertexOffset();
1237  break;
1238  }
1239  default:
1240  assert(false && "Unhandled draw type");
1241  }
1242 
1243  std::vector<Object> InitialObjects = Objects;
1244 
1245  // Create pipeline memory and populate with input/output variables.
1246  std::shared_ptr<Memory> PipelineMemory =
1247  std::make_shared<Memory>(Dev, MemoryScope::Invocation);
1248  std::map<const Variable *, uint64_t> OutputAddresses;
1249  for (auto Var : CurrentStage->getEntryPoint()->getVariables())
1250  {
1251  const Type *Ty = Var->getType();
1252  if (Ty->getStorageClass() == SpvStorageClassInput)
1253  {
1254  // Allocate storage for input variable.
1255  const Type *ElemTy = Ty->getElementType();
1256  size_t ElemSize = ElemTy->getSize();
1257  uint64_t Address = PipelineMemory->allocate(ElemSize);
1258  InitialObjects[Var->getId()] = Object(Ty, Address);
1259 
1260  // Initialize input variable data.
1261  if (Var->hasDecoration(SpvDecorationLocation))
1262  {
1263  uint32_t Location = Var->getDecoration(SpvDecorationLocation);
1264  uint32_t Component = 0;
1265  if (Var->hasDecoration(SpvDecorationComponent))
1266  Component = Var->getDecoration(SpvDecorationComponent);
1267 
1268  if (ElemTy->isMatrix())
1269  {
1270  assert(Component == 0);
1271 
1272  const Type *ColTy = ElemTy->getElementType();
1273  size_t ColSize = ColTy->getSize();
1274 
1275  // Each matrix column occupies a distinct location.
1276  for (uint32_t Col = 0; Col < ElemTy->getElementCount(); Col++)
1277  {
1278  loadVertexInput(DC->getPipelineContext(), &*PipelineMemory,
1279  Address + Col * ColSize, VertexIndex,
1280  InstanceIndex, Location + Col, 0, ColTy);
1281  }
1282  }
1283  else
1284  {
1285  loadVertexInput(DC->getPipelineContext(), &*PipelineMemory, Address,
1286  VertexIndex, InstanceIndex, Location, Component,
1287  ElemTy);
1288  }
1289  }
1290  else if (Var->hasDecoration(SpvDecorationBuiltIn))
1291  {
1292  switch (Var->getDecoration(SpvDecorationBuiltIn))
1293  {
1294  case SpvBuiltInInstanceIndex:
1295  PipelineMemory->store(Address, 4, (const uint8_t *)&InstanceIndex);
1296  break;
1297  case SpvBuiltInVertexIndex:
1298  assert(ElemSize == 4);
1299  PipelineMemory->store(Address, 4, (const uint8_t *)&VertexIndex);
1300  break;
1301  default:
1302  assert(false && "Unhandled vertex input builtin");
1303  }
1304  }
1305  else
1306  {
1307  assert(false && "Unhandled input variable type");
1308  }
1309  }
1310  else if (Ty->getStorageClass() == SpvStorageClassOutput)
1311  {
1312  // Allocate storage for output variable and store address.
1313  uint64_t Address =
1314  PipelineMemory->allocate(Ty->getElementType()->getSize());
1315  InitialObjects[Var->getId()] = Object(Ty, Address);
1316  OutputAddresses[Var] = Address;
1317  }
1318  }
1319 
1320  // Create shader invocation.
1321  CurrentInvocation =
1322  new Invocation(Dev, *CurrentStage, InitialObjects, PipelineMemory,
1323  nullptr, Dim3(VertexIndex, 0, 0));
1324 
1325  // Run shader invocation to completion.
1326  interact();
1327  while (CurrentInvocation->getState() == Invocation::READY)
1328  {
1329  CurrentInvocation->step();
1330  interact();
1331  }
1332 
1333  delete CurrentInvocation;
1334  CurrentInvocation = nullptr;
1335 
1336  // Gather output variables.
1337  for (auto Var : CurrentStage->getEntryPoint()->getVariables())
1338  {
1339  if (Var->getType()->getStorageClass() != SpvStorageClassOutput)
1340  continue;
1341 
1342  uint64_t BaseAddress = OutputAddresses[Var];
1343  const Type *Ty = Var->getType()->getElementType();
1344 
1345  if (Var->hasDecoration(SpvDecorationBuiltIn))
1346  {
1347  SpvBuiltIn BuiltIn =
1348  (SpvBuiltIn)Var->getDecoration(SpvDecorationBuiltIn);
1349  State->VertexOutputs[WorkIndex].BuiltIns[BuiltIn] =
1350  Object::load(Ty, *PipelineMemory, BaseAddress);
1351  }
1352  else if (Var->hasDecoration(SpvDecorationLocation))
1353  {
1354  uint32_t Location = Var->getDecoration(SpvDecorationLocation);
1355  uint32_t Component = 0;
1356  if (Var->hasDecoration(SpvDecorationComponent))
1357  Component = Var->getDecoration(SpvDecorationComponent);
1358  State->VertexOutputs[WorkIndex].Locations[{Location, Component}] =
1359  Object::load(Ty, *PipelineMemory, BaseAddress);
1360  }
1361  else if (Ty->getTypeId() == Type::STRUCT &&
1362  Ty->getStructMemberDecorations(0).count(SpvDecorationBuiltIn))
1363  {
1364  // Load builtin from each structure member.
1365  for (uint32_t i = 0; i < Ty->getElementCount(); i++)
1366  {
1367  uint64_t Address = BaseAddress + Ty->getElementOffset(i);
1368  SpvBuiltIn BuiltIn = (SpvBuiltIn)Ty->getStructMemberDecorations(i).at(
1369  SpvDecorationBuiltIn);
1370  State->VertexOutputs[WorkIndex].BuiltIns[BuiltIn] =
1371  Object::load(Ty->getElementType(i), *PipelineMemory, Address);
1372  }
1373  }
1374  else
1375  {
1376  assert(false && "Unhandled output variable type");
1377  }
1378  }
1379  }
1380 }
1381 
1382 void PipelineExecutor::finalizeVariables(const DescriptorSetMap &DSM)
1383 {
1384  // Copy array variable data back to original buffers.
1385  for (auto V : CurrentStage->getModule()->getVariables())
1386  {
1387  // TODO: Skip constant variables too
1388  if (!V->isBufferVariable())
1389  continue;
1390  if (V->getType()->getElementType()->getTypeId() != Type::ARRAY)
1391  continue;
1392 
1393  assert(V->getType()->getElementType()->getTypeId() == Type::ARRAY);
1394  const Type *ArrayTy = V->getType()->getElementType();
1395 
1396  // Get descriptor set and binding.
1397  uint32_t Set = V->getDecoration(SpvDecorationDescriptorSet);
1398  uint32_t Binding = V->getDecoration(SpvDecorationBinding);
1399  assert(DSM.count(Set));
1400 
1401  uint64_t Address = Objects[V->getId()].get<uint64_t>();
1402  const DescriptorElement *DescriptorElements =
1403  Objects[V->getId()].getDescriptorElements();
1404  assert(DescriptorElements);
1405 
1406  // Copy array element values to original buffers.
1407  for (uint32_t i = 0; i < ArrayTy->getElementCount(); i++)
1408  {
1409  if (!DSM.at(Set).count({Binding, i}))
1410  continue;
1411 
1412  Memory::copy(DSM.at(Set).at({Binding, i}).Address, Dev.getGlobalMemory(),
1413  DescriptorElements[i].Address, Dev.getGlobalMemory(),
1414  DescriptorElements[i].NumBytes);
1415  }
1416 
1417  // Release allocation.
1418  Dev.getGlobalMemory().release(Address);
1419  delete[] DescriptorElements;
1420  }
1421 }
1422 
1423 void PipelineExecutor::initializeVariables(const talvos::DescriptorSetMap &DSM,
1424  uint64_t PushConstantAddress)
1425 {
1426  for (auto V : CurrentStage->getModule()->getVariables())
1427  {
1428  // Set push constant data address.
1429  if (V->getType()->getStorageClass() == SpvStorageClassPushConstant)
1430  {
1431  Objects[V->getId()] = Object(V->getType(), PushConstantAddress);
1432  continue;
1433  }
1434 
1435  if (!V->isBufferVariable())
1436  continue;
1437 
1438  // Look up variable in descriptor set and set pointer value if present.
1439  uint32_t Set = V->getDecoration(SpvDecorationDescriptorSet);
1440  uint32_t Binding = V->getDecoration(SpvDecorationBinding);
1441  if (!DSM.count(Set))
1442  continue;
1443 
1444  if (V->getType()->getElementType()->getTypeId() == Type::ARRAY)
1445  {
1446  const Type *ArrayType = V->getType()->getElementType();
1447 
1448  // Allocate array for descriptor element information.
1449  DescriptorElement *DescriptorElements =
1450  new DescriptorElement[ArrayType->getElementCount()];
1451 
1452  // Determine offset for each array element and total size.
1453  uint64_t NumBytes = 0;
1454  for (uint32_t i = 0; i < ArrayType->getElementCount(); i++)
1455  {
1456  if (!DSM.at(Set).count({Binding, i}))
1457  {
1458  DescriptorElements[i] = {0, 0};
1459  continue;
1460  }
1461 
1462  size_t ElemSize = DSM.at(Set).at({Binding, i}).NumBytes;
1463  DescriptorElements[i] = {NumBytes, ElemSize};
1464  NumBytes += (uint64_t)ElemSize;
1465  }
1466 
1467  // Create new allocation to store whole array.
1468  uint64_t Address = Dev.getGlobalMemory().allocate(NumBytes);
1469  Objects[V->getId()] = Object(V->getType(), Address);
1470  Objects[V->getId()].setDescriptorElements(DescriptorElements);
1471 
1472  // Copy array element values into new allocation.
1473  for (uint32_t i = 0; i < ArrayType->getElementCount(); i++)
1474  {
1475  if (!DSM.at(Set).count({Binding, i}))
1476  continue;
1477 
1478  // Set final address for start of array element.
1479  DescriptorElements[i].Address = Address + DescriptorElements[i].Address;
1480 
1481  // Perform copy.
1482  Memory::copy(DescriptorElements[i].Address, Dev.getGlobalMemory(),
1483  DSM.at(Set).at({Binding, i}).Address,
1484  Dev.getGlobalMemory(), DescriptorElements[i].NumBytes);
1485  }
1486  }
1487  else
1488  {
1489  if (!DSM.at(Set).count({Binding, 0}))
1490  continue;
1491 
1492  Objects[V->getId()] =
1493  Object(V->getType(), DSM.at(Set).at({Binding, 0}).Address);
1494  }
1495  }
1496 }
1497 
1498 void PipelineExecutor::rasterizePoint(const DrawCommandBase &Cmd,
1499  const VkViewport &Viewport,
1500  const VertexOutput &Vertex)
1501 {
1502  const RenderPassInstance &RPI = Cmd.getRenderPassInstance();
1503 
1504  // Get the point position.
1505  Vec4 Position = getPosition(Vertex);
1506 
1507  // Get the point size.
1508  float PointSize = 0.1f;
1509  if (Vertex.BuiltIns.count(SpvBuiltInPointSize))
1510  PointSize = Vertex.BuiltIns.at(SpvBuiltInPointSize).get<float>();
1511 
1512  // Get framebuffer coordinate of primitive.
1513  float X = XDevToFB(Position.X, Viewport);
1514  float Y = YDevToFB(Position.Y, Viewport);
1515 
1516  // Compute a bounding box for the point primitive.
1517  int XMinFB = (int)std::floor(X - (PointSize / 2));
1518  int XMaxFB = (int)std::ceil(X + (PointSize / 2));
1519  int YMinFB = (int)std::floor(Y - (PointSize / 2));
1520  int YMaxFB = (int)std::ceil(Y + (PointSize / 2));
1521 
1522  buildPendingFragments(Cmd, XMinFB, XMaxFB, YMinFB, YMaxFB);
1523 
1524  // Run worker threads to process fragments.
1525  NextWorkIndex = 0;
1526  PointPrimitive Primitive = {X, Y, PointSize, Vertex};
1527  doWork([&]() { runPointFragmentWorker(Primitive, RPI); });
1528 
1529  PendingFragments.clear();
1530 }
1531 
1532 void PipelineExecutor::rasterizeTriangle(const DrawCommandBase &Cmd,
1533  const VkViewport &Viewport,
1534  const VertexOutput &VA,
1535  const VertexOutput &VB,
1536  const VertexOutput &VC)
1537 {
1538  const RenderPassInstance &RPI = Cmd.getRenderPassInstance();
1539 
1540  // Gather vertex positions for the primitive.
1541  Vec4 A = getPosition(VA);
1542  Vec4 B = getPosition(VB);
1543  Vec4 C = getPosition(VC);
1544 
1545  // Convert clip coordinates to normalized device coordinates.
1546  A.X /= A.W;
1547  A.Y /= A.W;
1548  A.Z /= A.W;
1549  B.X /= B.W;
1550  B.Y /= B.W;
1551  B.Z /= B.W;
1552  C.X /= C.W;
1553  C.Y /= C.W;
1554  C.Z /= C.W;
1555 
1556  // Compute an axis-aligned bounding box for the primitive.
1557  float XMinDev = std::fmin(A.X, std::fmin(B.X, C.X));
1558  float YMinDev = std::fmin(A.Y, std::fmin(B.Y, C.Y));
1559  float XMaxDev = std::fmax(A.X, std::fmax(B.X, C.X));
1560  float YMaxDev = std::fmax(A.Y, std::fmax(B.Y, C.Y));
1561  int XMinFB = (int)std::floor(XDevToFB(XMinDev, Viewport));
1562  int XMaxFB = (int)std::ceil(XDevToFB(XMaxDev, Viewport));
1563  int YMinFB = (int)std::floor(YDevToFB(YMinDev, Viewport));
1564  int YMaxFB = (int)std::ceil(YDevToFB(YMaxDev, Viewport));
1565 
1566  buildPendingFragments(Cmd, XMinFB, XMaxFB, YMinFB, YMaxFB);
1567 
1568  // Run worker threads to process fragments.
1569  NextWorkIndex = 0;
1570  TrianglePrimitive Primitive = {A, B, C, VA, VB, VC};
1571  doWork([&]() {
1572  runTriangleFragmentWorker(Primitive, Cmd.getPipelineContext(), RPI,
1573  Viewport);
1574  });
1575 
1576  PendingFragments.clear();
1577 }
1578 
1579 void PipelineExecutor::signalError()
1580 {
1581  if (!IsWorkerThread)
1582  return;
1583 
1584  // Drop to interactive prompt.
1585  Continue = false;
1586  interact();
1587 }
1588 
1589 Vec4 PipelineExecutor::getPosition(const VertexOutput &Out)
1590 {
1591  Vec4 Pos;
1592  assert(Out.BuiltIns.count(SpvBuiltInPosition));
1593  const Object &PosObj = Out.BuiltIns.at(SpvBuiltInPosition);
1594  assert(PosObj.getType()->isVector() &&
1595  PosObj.getType()->getElementType()->isFloat() &&
1596  PosObj.getType()->getElementType()->getBitWidth() == 32 &&
1597  "Position built-in type must be float4");
1598  memcpy(&Pos, PosObj.getData(), sizeof(Vec4));
1599  return Pos;
1600 }
1601 
1602 // Load normalized vertex input data from memory.
1603 template <typename T>
1604 void loadNormalizedVertexInput(Object &Obj, VkFormat Format, const Memory &Mem,
1605  uint64_t Address)
1606 {
1607  assert(getElementSize(Format) <= (sizeof(T) * 4));
1608  T Data[4];
1609  Mem.load((uint8_t *)Data, Address, getElementSize(Format));
1610  for (uint32_t i = 0; i < (getElementSize(Format) / sizeof(T)); i++)
1611  Obj.set<float>(Data[i] / (float)std::numeric_limits<T>::max(), i);
1612 }
1613 
1614 void PipelineExecutor::loadVertexInput(const PipelineContext &PC,
1615  Memory *PipelineMemory, uint64_t Address,
1616  uint32_t VertexIndex,
1617  uint32_t InstanceIndex,
1618  uint32_t Location, uint32_t Component,
1619  const Type *ElemTy) const
1620 {
1621  const GraphicsPipeline *Pipeline = PC.getGraphicsPipeline();
1622  assert(Pipeline != nullptr);
1623 
1624  // Get vertex attribute description.
1625  auto &Attributes = Pipeline->getVertexAttributeDescriptions();
1626  auto Attr =
1627  std::find_if(Attributes.begin(), Attributes.end(),
1628  [Location](auto Elem) { return Elem.location == Location; });
1629  assert(Attr != Attributes.end() && "invalid attribute location");
1630 
1631  // Get vertex binding description.
1632  auto &Bindings = Pipeline->getVertexBindingDescriptions();
1633  auto Binding =
1634  std::find_if(Bindings.begin(), Bindings.end(),
1635  [Attr](auto Elem) { return Elem.binding == Attr->binding; });
1636  assert(Binding != Bindings.end() && "invalid binding number");
1637 
1638  // Calculate variable address in vertex buffer memory.
1639  uint64_t ElemAddr = PC.getVertexBindings().at(Attr->binding);
1640  switch (Binding->inputRate)
1641  {
1642  case VK_VERTEX_INPUT_RATE_VERTEX:
1643  ElemAddr += VertexIndex * Binding->stride;
1644  break;
1645  case VK_VERTEX_INPUT_RATE_INSTANCE:
1646  ElemAddr += InstanceIndex * Binding->stride;
1647  break;
1648  default:
1649  assert(false && "Unhandled vertex input rate");
1650  }
1651  ElemAddr += Attr->offset;
1652 
1653  // Add offset for requested component.
1654  if (Component)
1655  {
1656  assert(ElemTy->isScalar() || ElemTy->isVector());
1657  ElemAddr += Component * ElemTy->getScalarType()->getSize();
1658  }
1659 
1660  // Create object to hold converted vertex input values.
1661  Object Result(ElemTy);
1662 
1663  // Set default values for the variable.
1664  // As per the Vulkan specification, if the G, B, or A components are
1665  // missing, they should be filled with (0,0,1) as needed,
1666  Result.zero();
1667  if (ElemTy->isVector() && ElemTy->getElementCount() == 4)
1668  {
1669  const Type *ScalarTy = ElemTy->getElementType();
1670  if (ScalarTy->isFloat() && ScalarTy->getBitWidth() == 32)
1671  Result.set<float>(1.f, 3);
1672  else if (ScalarTy->isFloat() && ScalarTy->getBitWidth() == 64)
1673  Result.set<double>(1.0, 3);
1674  else if (ScalarTy->isInt() && ScalarTy->getBitWidth() == 16)
1675  Result.set<uint16_t>(1, 3);
1676  else if (ScalarTy->isInt() && ScalarTy->getBitWidth() == 32)
1677  Result.set<uint32_t>(1, 3);
1678  else if (ScalarTy->isInt() && ScalarTy->getBitWidth() == 64)
1679  Result.set<uint64_t>(1, 3);
1680  else
1681  assert(false && "Unhandled vertex input variable type");
1682  }
1683 
1684  switch (Attr->format)
1685  {
1686  case VK_FORMAT_R32_SINT:
1687  case VK_FORMAT_R32G32_SINT:
1688  case VK_FORMAT_R32G32B32_SINT:
1689  case VK_FORMAT_R32G32B32A32_SINT:
1690  case VK_FORMAT_R32_UINT:
1691  case VK_FORMAT_R32G32_UINT:
1692  case VK_FORMAT_R32G32B32_UINT:
1693  case VK_FORMAT_R32G32B32A32_UINT:
1694  case VK_FORMAT_R32_SFLOAT:
1695  case VK_FORMAT_R32G32_SFLOAT:
1696  case VK_FORMAT_R32G32B32_SFLOAT:
1697  case VK_FORMAT_R32G32B32A32_SFLOAT:
1698  // Copy vertex input data unmodified.
1699  Dev.getGlobalMemory().load(
1700  Result.getData(), ElemAddr,
1701  std::min(ElemTy->getSize(), (size_t)getElementSize(Attr->format)));
1702  break;
1703  case VK_FORMAT_R8_SNORM:
1704  case VK_FORMAT_R8G8_SNORM:
1705  case VK_FORMAT_R8G8B8_SNORM:
1706  case VK_FORMAT_R8G8B8A8_SNORM:
1707  loadNormalizedVertexInput<int8_t>(Result, Attr->format,
1708  Dev.getGlobalMemory(), ElemAddr);
1709  break;
1710  case VK_FORMAT_R16_SNORM:
1711  case VK_FORMAT_R16G16_SNORM:
1712  case VK_FORMAT_R16G16B16_SNORM:
1713  case VK_FORMAT_R16G16B16A16_SNORM:
1714  loadNormalizedVertexInput<int16_t>(Result, Attr->format,
1715  Dev.getGlobalMemory(), ElemAddr);
1716  break;
1717  case VK_FORMAT_R8_UNORM:
1718  case VK_FORMAT_R8G8_UNORM:
1719  case VK_FORMAT_R8G8B8_UNORM:
1720  case VK_FORMAT_R8G8B8A8_UNORM:
1721  loadNormalizedVertexInput<uint8_t>(Result, Attr->format,
1722  Dev.getGlobalMemory(), ElemAddr);
1723  break;
1724  case VK_FORMAT_R16_UNORM:
1725  case VK_FORMAT_R16G16_UNORM:
1726  case VK_FORMAT_R16G16B16_UNORM:
1727  case VK_FORMAT_R16G16B16A16_UNORM:
1728  loadNormalizedVertexInput<uint16_t>(Result, Attr->format,
1729  Dev.getGlobalMemory(), ElemAddr);
1730  break;
1731  default:
1732  std::cerr << "Unhandled vertex input format" << std::endl;
1733  abort();
1734  }
1735 
1736  // Store converted vertex data to pipeline memory.
1737  Result.store(*PipelineMemory, Address);
1738 }
1739 
1740 // Private functions for interactive execution and debugging.
1741 
1742 void PipelineExecutor::interact()
1743 {
1744  if (!Interactive)
1745  return;
1746 
1747  // Check if a breakpoint has been reached.
1748  const Instruction *CI = CurrentInvocation->getCurrentInstruction();
1749  if (CI && CI->getResultType() &&
1750  CurrentInvocation->getState() != Invocation::BARRIER)
1751  {
1752  uint32_t ResultId = CI->getOperand(1);
1753  auto BP = std::find_if(
1754  Breakpoints.begin(), Breakpoints.end(),
1755  [ResultId](const auto &BP) { return BP.second == ResultId; });
1756  if (BP != Breakpoints.end())
1757  {
1758  std::cout << "Breakpoint " << BP->first << " hit by invocation "
1759  << CurrentInvocation->getGlobalId() << std::endl;
1760  Continue = false;
1761  }
1762  }
1763 
1764  // Keep going if user used 'continue'.
1765  if (Continue)
1766  return;
1767 
1768  printContext();
1769 
1770  // Loop until the user enters a command that resumes execution.
1771  bool IsTTY = isatty(STDIN_FILENO) == 1;
1772  while (true)
1773  {
1774  // Get line of user input.
1775  bool eof = false;
1776  std::string Line;
1777 #if HAVE_READLINE
1778  if (IsTTY)
1779  {
1780  char *CLine = readline("(talvos) ");
1781  if (CLine)
1782  {
1783  Line = CLine;
1784  free(CLine);
1785  }
1786  else
1787  eof = true;
1788  }
1789  else
1790 #endif
1791  {
1792  if (IsTTY)
1793  std::cout << "(talvos) " << std::flush;
1794  getline(std::cin, Line);
1795  eof = std::cin.eof();
1796  }
1797 
1798  // Quit on EOF.
1799  if (eof)
1800  {
1801  if (IsTTY)
1802  std::cout << "(quit)" << std::endl;
1803  quit({});
1804  return;
1805  }
1806 
1807  // Split line into tokens.
1808  std::istringstream ISS(Line);
1809  std::vector<std::string> Tokens{std::istream_iterator<std::string>{ISS},
1810  std::istream_iterator<std::string>{}};
1811  if (!Tokens.size())
1812  {
1813  // Repeat last command if possible, otherwise skip.
1814  if (!LastLine.size())
1815  continue;
1816  Tokens = LastLine;
1817  }
1818  else
1819  {
1820  // Save tokens for repeating command.
1821  LastLine = Tokens;
1822 #if HAVE_READLINE
1823  add_history(Line.c_str());
1824 #endif
1825  }
1826 
1828 #define CMD(LONG, SHORT, FUNC) \
1829  if (Tokens[0] == LONG || Tokens[0] == SHORT) \
1830  { \
1831  if (FUNC(Tokens)) \
1832  break; \
1833  else \
1834  continue; \
1835  }
1836  CMD("break", "b", brk);
1837  CMD("breakpoint", "bp", breakpoint);
1838  CMD("continue", "c", cont);
1839  CMD("help", "h", help);
1840  CMD("print", "p", print);
1841  CMD("quit", "q", quit);
1842  CMD("step", "s", step);
1843  CMD("switch", "sw", swtch);
1844 #undef CMD
1845  std::cerr << "Unrecognized command '" << Tokens[0] << "'" << std::endl;
1846  }
1847 }
1848 
1849 void PipelineExecutor::printContext() const
1850 {
1851  assert(CurrentInvocation);
1852  if (CurrentInvocation->getState() == Invocation::FINISHED)
1853  std::cout << " <finished>" << std::endl;
1854  else
1855  {
1856  const Instruction *CI = CurrentInvocation->getCurrentInstruction();
1857  const Instruction *I = CI;
1858 
1859  // Print set of instructions around current location.
1860  // TODO: Show instructions in adjacent blocks?
1861  int i;
1862  for (i = 0; i > -CONTEXT_SIZE; i--)
1863  {
1864  if (!I->previous())
1865  break;
1866  I = I->previous();
1867  }
1868  for (; i < CONTEXT_SIZE + 1; i++)
1869  {
1870  if (CI == I)
1871  {
1872  std::cout << "-> ";
1873  if (CurrentInvocation->getState() == Invocation::BARRIER)
1874  std::cout << " <barrier>" << std::endl << " ";
1875  }
1876  else
1877  std::cout << " ";
1878 
1879  I->print(std::cout);
1880  std::cout << std::endl;
1881 
1882  I = I->next();
1883  if (!I)
1884  break;
1885  }
1886  }
1887 }
1888 
1889 bool PipelineExecutor::brk(const std::vector<std::string> &Args)
1890 {
1891  if (Args.size() != 2)
1892  {
1893  std::cerr << "Usage: break %id" << std::endl;
1894  return false;
1895  }
1896 
1897  // Parse target result ID.
1898  char *Next;
1899  uint32_t Id = (uint32_t)strtoul(Args[1].c_str() + 1, &Next, 10);
1900  if (Args[1][0] != '%' || strlen(Next))
1901  {
1902  std::cerr << "Invalid result ID '" << Args[1] << "'" << std::endl;
1903  return false;
1904  }
1905 
1906  // Set breakpoint.
1907  Breakpoints[NextBreakpoint] = Id;
1908  std::cout << "Breakpoint " << NextBreakpoint << " set for result ID %" << Id
1909  << std::endl;
1910  NextBreakpoint++;
1911 
1912  return false;
1913 }
1914 
1915 bool PipelineExecutor::breakpoint(const std::vector<std::string> &Args)
1916 {
1917  if (Args.size() < 2)
1918  {
1919  std::cerr << "Usage: breakpoint [clear|delete|list]" << std::endl;
1920  return false;
1921  }
1922 
1923  if (Args[1] == "clear")
1924  {
1925  Breakpoints.clear();
1926  std::cout << "All breakpoints cleared." << std::endl;
1927  }
1928  else if (Args[1] == "delete")
1929  {
1930  if (Args.size() != 3)
1931  {
1932  std::cerr << "Usage: breakpoint delete ID" << std::endl;
1933  return false;
1934  }
1935 
1936  // Parse breakpoint ID.
1937  char *Next;
1938  uint32_t Id = (uint32_t)strtoul(Args[2].c_str(), &Next, 10);
1939  if (strlen(Next) || !Breakpoints.count(Id))
1940  {
1941  std::cerr << "Invalid breakpoint ID '" << Args[2] << "'" << std::endl;
1942  return false;
1943  }
1944 
1945  Breakpoints.erase(Id);
1946  std::cout << "Breakpoint " << Id << " deleted." << std::endl;
1947  }
1948  else if (Args[1] == "list")
1949  {
1950  if (Breakpoints.empty())
1951  std::cout << "No breakpoints." << std::endl;
1952  else
1953  {
1954  for (auto &BP : Breakpoints)
1955  std::cout << "Breakpoint " << BP.first << ": %" << BP.second
1956  << std::endl;
1957  }
1958  }
1959  else
1960  std::cerr << "Usage: breakpoint [clear|delete|list]" << std::endl;
1961 
1962  return false;
1963 }
1964 
1965 bool PipelineExecutor::cont(const std::vector<std::string> &Args)
1966 {
1967  Continue = true;
1968  return true;
1969 }
1970 
1971 bool PipelineExecutor::help(const std::vector<std::string> &Args)
1972 {
1973  std::cout << "Command list:" << std::endl;
1974  std::cout << " break (b)" << std::endl;
1975  std::cout << " breakpoint (bp)" << std::endl;
1976  std::cout << " continue (c)" << std::endl;
1977  std::cout << " help (h)" << std::endl;
1978  std::cout << " print (p)" << std::endl;
1979  std::cout << " quit (q)" << std::endl;
1980  std::cout << " step (s)" << std::endl;
1981  std::cout << " switch (sw)" << std::endl;
1982  // TODO: help for specific commands
1983  // std::cout << "(type 'help <command>' for more information)" << std::endl;
1984 
1985  return false;
1986 }
1987 
1988 bool PipelineExecutor::print(const std::vector<std::string> &Args)
1989 {
1990  if (Args.size() != 2)
1991  {
1992  std::cerr << "Usage: print %<id>" << std::endl;
1993  return false;
1994  }
1995 
1996  // Parse result ID.
1997  char *Next;
1998  uint32_t Id = (uint32_t)strtoul(Args[1].c_str() + 1, &Next, 10);
1999  if (Args[1][0] != '%' || strlen(Next))
2000  {
2001  std::cerr << "Invalid result ID" << std::endl;
2002  return false;
2003  }
2004 
2005  std::cout << " %" << std::dec << Id << " = ";
2006 
2007  // Handle types.
2008  if (const Type *Ty = CurrentStage->getModule()->getType(Id))
2009  {
2010  std::cout << Ty << std::endl;
2011  return false;
2012  }
2013 
2014  // Print object value for current invocation.
2015  std::cout << CurrentInvocation->getObject(Id) << std::endl;
2016 
2017  return false;
2018 }
2019 
2020 bool PipelineExecutor::quit(const std::vector<std::string> &Args) { exit(0); }
2021 
2022 bool PipelineExecutor::step(const std::vector<std::string> &Args)
2023 {
2024  if (CurrentInvocation->getState() == Invocation::FINISHED)
2025  {
2026  std::cout << "Invocation has finished." << std::endl;
2027  return false;
2028  }
2029  else if (CurrentInvocation->getState() == Invocation::BARRIER)
2030  {
2031  std::cout << "Invocation is at a barrier." << std::endl;
2032  return false;
2033  }
2034 
2035  return true;
2036 }
2037 
2038 bool PipelineExecutor::swtch(const std::vector<std::string> &Args)
2039 {
2040  // TODO: Implement switch for vertex/fragment shaders.
2041  if (CurrentCommand->getType() != Command::DISPATCH)
2042  {
2043  std::cerr << "switch not implemented for this command." << std::endl;
2044  return false;
2045  }
2046 
2047  // TODO: Allow `select group X Y Z` or `select local X Y Z` as well?
2048  if (Args.size() < 2 || Args.size() > 4)
2049  {
2050  std::cerr << "Usage: switch X [Y [Z]]" << std::endl;
2051  return false;
2052  }
2053 
2054  // Parse global invocation ID.
2055  Dim3 Id(0, 0, 0);
2056  for (unsigned i = 1; i < Args.size(); i++)
2057  {
2058  char *Next;
2059  Id[i - 1] = (uint32_t)strtoul(Args[i].c_str(), &Next, 10);
2060  if (strlen(Next))
2061  {
2062  std::cerr << "Invalid global ID '" << Args[i] << "'" << std::endl;
2063  return false;
2064  }
2065  }
2066 
2067  // Check global index is within global bounds.
2068  Dim3 GroupSize = CurrentStage->getGroupSize();
2069  Dim3 NumGroups = ((const DispatchCommand *)CurrentCommand)->getNumGroups();
2070  if (Id.X >= GroupSize.X * NumGroups.X || Id.Y >= GroupSize.Y * NumGroups.Y ||
2071  Id.Z >= GroupSize.Z * NumGroups.Z)
2072  {
2073  std::cerr << "Global ID is out of the bounds of the current dispatch."
2074  << std::endl;
2075  return false;
2076  }
2077 
2078  // Check if we are already executing the target invocation.
2079  if (CurrentInvocation->getGlobalId() == Id)
2080  {
2081  std::cerr << "Already executing this invocation!" << std::endl;
2082  return false;
2083  }
2084 
2085  // Find workgroup with target group ID.
2086  Dim3 GroupId(Id.X / GroupSize.X, Id.Y / GroupSize.Y, Id.Z / GroupSize.Z);
2087  Workgroup *Group = nullptr;
2088  if (GroupId == CurrentGroup->getGroupId())
2089  {
2090  // Already running - nothing to do.
2091  Group = CurrentGroup;
2092  }
2093  if (!Group)
2094  {
2095  // Check running groups list.
2096  auto RG = std::find_if(
2097  RunningGroups.begin(), RunningGroups.end(),
2098  [&GroupId](const Workgroup *G) { return G->getGroupId() == GroupId; });
2099  if (RG != RunningGroups.end())
2100  {
2101  // Remove from running groups.
2102  Group = *RG;
2103  RunningGroups.erase(RG);
2104  }
2105  }
2106  if (!Group)
2107  {
2108  // Check pending groups list.
2109  auto PG = std::find(PendingGroups.begin() + NextWorkIndex,
2110  PendingGroups.end(), GroupId);
2111  if (PG != PendingGroups.end())
2112  {
2113  // Remove from pending groups and create the new workgroup.
2114  Group = createWorkgroup(*PG);
2115  Dev.reportWorkgroupBegin(Group);
2116  PendingGroups.erase(PG);
2117  }
2118  }
2119 
2120  if (!Group)
2121  {
2122  std::cerr << "Workgroup containing invocation has already finished."
2123  << std::endl;
2124  return false;
2125  }
2126 
2127  // Switch to target group.
2128  if (Group != CurrentGroup)
2129  {
2130  RunningGroups.push_back(CurrentGroup);
2131  CurrentGroup = Group;
2132  }
2133 
2134  // Switch to target invocation.
2135  Dim3 LocalId(Id.X % GroupSize.X, Id.Y % GroupSize.Y, Id.Z % GroupSize.Z);
2136  uint32_t LocalIndex =
2137  LocalId.X + (LocalId.Y + LocalId.Z * GroupSize.Y) * GroupSize.X;
2138  CurrentInvocation = CurrentGroup->getWorkItems()[LocalIndex].get();
2139 
2140  std::cout << "Switched to invocation with global ID " << Id << std::endl;
2141 
2142  printContext();
2143 
2144  return false;
2145 }
2146 
2147 } // namespace talvos
This class represents a module-scope variable declaration.
Definition: Variable.h:21
uint32_t getSubpassIndex() const
Returns the index of the current subpass.
Definition: RenderPass.h:116
std::vector< Workgroup * > RunningGroups
Pool of groups that have begun execution and been suspended.
uint32_t Y
Framebuffer y-coordinate.
This file declares the Workgroup class.
const Subpass & getSubpass(uint32_t Index) const
Returns the subpass at index Index.
Definition: RenderPass.cpp:23
This file declares the ComputePipeline class.
bool isScalar() const
Returns true if this is a scalar type.
Definition: Type.cpp:77
const GraphicsPipeline * getGraphicsPipeline() const
Returns the graphics pipeline.
const PipelineStage * CurrentStage
The pipeline stage currently being executed.
unsigned long getEnvUInt(const char *Name, unsigned Default)
Returns the integer value for the environment variable Name, or Default if it is not set...
Definition: Utils.cpp:33
This file declares the Device class.
bool isFloat() const
Returns true if this is a floating point type.
Definition: Type.h:107
void setDescriptorElements(const DescriptorElement *DAE)
Set the descriptor array elements for this object.
Definition: Object.cpp:304
Dim3 getGroupId() const
Returns the group ID of this workgroup.
Definition: Workgroup.h:52
#define DISPATCH(Op, Func)
size_t getSize() const
Returns the size of this type in bytes.
Definition: Type.h:81
const PipelineStage * getStage() const
Returns the pipeline stage.
Only allow Device objects to create PipelineExecutor instances.
uint32_t Y
Definition: Dim3.h:30
void run(const DispatchCommand &Cmd)
Run a compute dispatch command to completion.
State getState() const
Returns the state of this invocation.
bool isWorkerThread() const
Returns true if the calling thread is a PipelineExecutor worker thread.
std::map< SpvBuiltIn, Object > BuiltIns
BuiltIn variables.
void step()
Step this invocation by executing the next instruction.
void release(uint64_t Address)
Release the allocation with base address Address.
Definition: Memory.cpp:292
uint32_t X
Framebuffer x-coordinate.
const Command * CurrentCommand
The command currently being executed.
const VkPipelineRasterizationStateCreateInfo & getRasterizationState() const
Returns the rasterization state used by this pipeline.
This class encapsulates information about an indexed draw command.
Definition: Commands.h:395
bool ShutDownWorkers
Signal to shut down worker threads.
bool hasAlphaChannel(VkFormat Format)
Returns true if Format includes an alpha channel.
Definition: Image.cpp:889
~PipelineExecutor()
Destroy a pipeline executor.
Type getType() const
Returns the type of this command.
Definition: Commands.h:58
This file declares the Module class.
size_t getElementOffset(uint64_t Index) const
Returns the byte offset of the element at Index.
Definition: Type.cpp:26
const Type * getElementType(uint64_t Index=0) const
Returns the type of the element at Index.
Definition: Type.cpp:38
const Type * getScalarType() const
Returns the element type for vector types, or this for scalar types.
Definition: Type.cpp:49
const DescriptorSetMap & getGraphicsDescriptors() const
Returns the descriptor bindings for draw commands.
bool isInt() const
Returns true if this is an integer type.
Definition: Type.h:110
const VertexOutput & OutC
The vertex shader outputs for vertex C.
uint32_t getHeight() const
Returns the height of this framebuffer in pixels.
Definition: RenderPass.h:38
void initializeVariables(const DescriptorSetMap &DSM, uint64_t PushConstantAddress)
Initialize variables.
static std::map< uint32_t, uint32_t > Breakpoints
Map from breakpoint ID to instruction result ID.
float YFBToDev(float Yfb, VkViewport Viewport)
T get(unsigned C) const
Get a component value from the texel.
Definition: Image.h:44
This class represents a view into a range of image subresources.
Definition: Image.h:178
bool checkEnv(const char *Name, bool Default)
Returns true if the environment variable Name is set to 1, false for 0, or Default if it is not set...
Definition: Utils.cpp:16
This file declares the EntryPoint class.
float XFBToDev(float Xfb, VkViewport Viewport)
void set(unsigned C, T Value)
Set a component value in the texel.
Definition: Image.h:71
uint32_t getElementSize(VkFormat Format)
Returns the size in bytes for each element of an image with type Format.
Definition: Image.cpp:651
const Invocation * getCurrentInvocation() const
Returns the current invocation being executed.
uint32_t getIndexOffset() const
Returns the offset of the first index.
Definition: Commands.h:424
This class encapsulates information about a compute kernel launch.
Definition: Commands.h:278
void store(Memory &Mem, uint64_t Address) const
Store the value of this object to memory at Address.
Definition: Object.cpp:317
#define CMD(LONG, SHORT, FUNC)
const Framebuffer & getFramebuffer() const
Returns the framebuffer associated with this render pass instance.
Definition: RenderPass.h:110
const EntryPoint * getEntryPoint() const
Return the entry point this pipeline stage will invoke.
Definition: PipelineStage.h:51
bool isMatrix() const
Returns true if this is a matrix type.
Definition: Type.h:113
uint32_t getOperand(unsigned i) const
Returns the operand at index i;.
Definition: Instruction.h:52
void addWorkItem(std::unique_ptr< Invocation > WorkItem)
Add a work-item invocation to this group, transferring ownership.
Definition: Workgroup.cpp:46
uint64_t getIndexBaseAddress() const
Returns the address in memory of the indices.
Definition: Commands.h:421
const RenderPassInstance & getRenderPassInstance() const
Returns the render pass instance used by this command.
Definition: Commands.h:344
const VertexOutput & OutA
The vertex shader outputs for vertex A.
const Workgroup * getCurrentWorkgroup() const
Returns the current workgroup being executed.
void set(T Value, uint32_t Element=0)
Set the value of this object to a scalar of type T.
Definition: Object.cpp:295
std::condition_variable WorkerSignal
Condition variable used to wake worker threads.
const std::map< uint32_t, uint32_t > & getStructMemberDecorations(uint32_t Index) const
Returns the decoration map for the structure member at Index.
Definition: Type.cpp:67
This class represents an instance of a render pass being used for drawing.
Definition: RenderPass.h:95
This class encapsulates pipeline state and bound resources.
const BlendAttachmentStateList & getBlendAttachmentStates() const
Returns the list of blend attachment states.
#define CONTEXT_SIZE
The number of lines before and after the current instruction to print.
const ComputePipeline * getComputePipeline() const
Returns the compute pipeline.
Dim3 getNumGroups() const
Returns the number of workgroups this command launches.
Definition: Commands.h:294
const PipelineContext & getPipelineContext() const
Returns the pipeline context.
Definition: Commands.h:341
uint32_t Z
Definition: Dim3.h:30
This class represents a single execution of a SPIR-V entry point.
Definition: Invocation.h:33
This class represents a single texel with four 32-bit component values.
Definition: Image.h:28
uint32_t getElementCount() const
Returns the number of elements in this array, struct, or vector type.
Definition: Type.h:64
const VertexOutput & OutB
The vertex shader outputs for vertex B.
std::vector< Object > Objects
The initial object values for each invocation.
This file declares miscellaneous utilities used internally by libtalvos.
This file declares the Instruction class.
Triangle primitive data, used for rasterization.
Workgroup * createWorkgroup(Dim3 GroupId) const
Create a compute shader workgroup and its work-item invocations.
This file declares the Type class.
float YDevToFB(float Yd, VkViewport Viewport)
Class representing a 3-dimensional size or ID.
Definition: Dim3.h:22
std::map< uint32_t, talvos::DescriptorSet > DescriptorSetMap
Mapping from set numbers to descriptor sets.
std::map< std::pair< uint32_t, uint32_t >, Object > Locations
Location variables (key is {Location, Component}).
This is an abstract base class for draw commands.
Definition: Commands.h:311
const std::vector< VkRect2D > & getScissors() const
Returns the scissor rectangles.
VkIndexType getIndexType() const
Returns the type of the indices.
Definition: Commands.h:427
const VertexOutput & Out
The vertex shader output.
void interpolate(Object &Output, const Type *Ty, size_t Offset, const Object &FA, const Object &FB, const Object &FC, float AW, float BW, float CW, float InvW, float a, float b, float c, bool Flat, bool Perspective)
Recursively populate a fragment shader input variable by interpolating between the vertex shader outp...
uint32_t getInstanceOffset() const
Returns the offset of the first instance.
Definition: Commands.h:332
This class represents a framebuffer that can be used for rendering.
Definition: RenderPass.h:22
VkPrimitiveTopology getTopology() const
Returns the primitive topology used by this pipeline.
std::mutex WorkerMutex
Mutex used to synchronize with worker threads.
This file declares the PipelineExecutor class.
uint32_t getWidth() const
Returns the width of this framebuffer in pixels.
Definition: RenderPass.h:44
const PipelineStage * getFragmentStage() const
Returns the fragment pipeline stage.
std::vector< VkPipelineColorBlendAttachmentState > BlendAttachmentStateList
A list of pipeline color blend attachment states.
This class represents an address space in the virtual device.
Definition: Memory.h:37
void print(std::ostream &O, bool Align=true) const
Print a human-readable form of this instruction to O.
Definition: Instruction.cpp:43
const PipelineContext & getPipelineContext() const
Returns the pipeline context.
Definition: Commands.h:297
Structure used to hold information about an element of a descriptor array.
Definition: Object.h:42
const VertexAttributeDescriptionList & getVertexAttributeDescriptions() const
Returns the list of vertex attribute descriptions.
This file declares the PipelineStage class.
Device & Dev
The device this shader is executing on.
const std::array< float, 4 > & getBlendConstants() const
Returns the blend constants.
This file declares data structures and functions for handling images.
This class encapsulates a graphics pipeline.
This class represents a Vulkan render pass.
Definition: RenderPass.h:69
bool wasDiscarded() const
Returns true if this invocation has been discarded with OpKill.
Definition: Invocation.h:93
uint32_t getBitWidth() const
Returns the bit-width of this type.
Definition: Type.cpp:20
uint32_t getStorageClass() const
Returns the storage class of this type.
Definition: Type.cpp:60
void zero()
Set all of the value bits in this object to zero.
Definition: Object.cpp:367
A Device instance encapsulates properties and state for the virtual device.
Definition: Device.h:29
const std::vector< ImageView * > & getAttachments() const
Returns the list of attachments backing this framebuffer.
Definition: RenderPass.h:32
Outputs from a vertex shading stage.
This file declares the RenderPass class and related data structures.
uint32_t X
Definition: Dim3.h:30
This class represents a workgroup executing a compute command.
Definition: Workgroup.h:27
float X
The framebuffer x-coordinate.
const VariableList & getVariables() const
Returns the input/output variables used by this entry point.
Definition: EntryPoint.h:47
This file declares the GraphicsPipeline class.
uint8_t * getData()
Returns a mutable pointer to the raw data backing this object.
Definition: Object.h:88
const Instruction * previous() const
Get the previous instruction in the containing block.
Definition: Instruction.h:73
Point primitive data, used for rasterization.
const std::vector< Object > & getObjects() const
Returns a list of all result objects in this pipeline stage.
Definition: PipelineStage.h:60
const Type * getResultType() const
Returns the result type of this instruction, or nullptr if it does not produce a result.
Definition: Instruction.h:59
float Y
The framebuffer y-coordinate.
const RenderPass & getRenderPass() const
Returns the render pass.
Definition: RenderPass.h:113
This file declares the Memory class.
std::vector< std::thread > WorkerThreads
List of worker threads.
void loadNormalizedVertexInput(Object &Obj, VkFormat Format, const Memory &Mem, uint64_t Address)
Memory & getGlobalMemory()
Get the global memory instance associated with this device.
Definition: Device.h:42
void blendTexel(Image::Texel &NewTexel, const Image::Texel &OldTexel, const VkPipelineColorBlendAttachmentState &Blend, const std::array< float, 4 > &BlendConstants)
const DescriptorSetMap & getComputeDescriptors() const
Returns the descriptor bindings for compute commands.
TypeId getTypeId() const
Returns the type ID of this type.
Definition: Type.h:92
uint64_t NumBytes
Size of descriptor element.
Definition: Object.h:45
uint32_t getNumInstances() const
Returns the number of instances.
Definition: Commands.h:335
void load(uint8_t *Result, uint64_t Address, uint64_t NumBytes) const
Load NumBytes of data from Address into Result.
Definition: Memory.cpp:249
const Instruction * getCurrentInstruction() const
Returns the instruction that this invocation is executing.
Definition: Invocation.h:74
PipelineExecutor(PipelineExecutorKey Key, Device &Dev)
Create a pipeline executor on Dev.
Dim3 getGroupSize() const
Return the workgroup size.
Definition: PipelineStage.h:54
This file declares the Command base class and its subclasses.
bool isThreadSafe() const
Returns true if all of the loaded plugins are thread-safe.
Definition: Device.cpp:132
This class represents a SPIR-V type.
Definition: Type.h:33
float InvW
Inverse of the interpolated clip w coordinate.
uint32_t Data[3]
Definition: Dim3.h:32
This file declares the Invocation class.
Dim3 getGlobalId() const
Returns the global invocation ID.
Definition: Invocation.h:80
const VertexBindingDescriptionList & getVertexBindingDescriptions() const
Returns the list of vertex binding descriptions.
uint64_t allocate(uint64_t NumBytes)
Allocate a new buffer of size NumBytes.
Definition: Memory.cpp:52
Object getObject(uint32_t Id) const
Returns the object with the specified ID.
uint32_t getNumAttachments() const
Returns the number of attachments in this render pass.
Definition: RenderPass.h:81
This class represents an instruction result.
Definition: Object.h:51
void store(uint64_t Address, uint64_t NumBytes, const uint8_t *Data)
Store NumBytes of data from Data to Address.
Definition: Memory.cpp:306
const std::vector< VkViewport > & getViewports() const
Returns the viewports.
static const uint32_t PUSH_CONSTANT_MEM_SIZE
The number of bytes used for push constant data.
const Instruction * next() const
Get the next instruction in the containing block.
Definition: Instruction.h:68
std::vector< std::unique_ptr< Invocation > > WorkItemList
List of work items in the workgroup.
Definition: Workgroup.h:31
const PipelineStage * getVertexStage() const
Returns the vertex pipeline stage.
This class represents a SPIR-V instruction.
Definition: Instruction.h:27
const uint8_t * getPushConstantData() const
Returns a pointer to the push constant data.
static uint32_t NextBreakpoint
Index of the next breakpoint to create.
uint32_t getNumVertices() const
Returns the number of vertices.
Definition: Commands.h:338
This file declares the Variable class.
float XDevToFB(float Xd, VkViewport Viewport)
bool Continue
True when the user has used continue command.
std::vector< Dim3 > PendingGroups
Pool of group IDs pending creation and execution.
bool isVector() const
Returns true if this is a vector type.
Definition: Type.h:125
This class encapsulates a compute pipeline.
unsigned NumThreads
The number of worker threads currently executing.
State to be carried through the execution of a render pipeline.
uint64_t Address
Address of descriptor element.
Definition: Object.h:44
Dim3 getBaseGroup() const
Returns the base workgroup offset used by this command.
Definition: Commands.h:291
bool Interactive
True when interactive mode is enabled.
const WorkItemList & getWorkItems() const
Return the list of work items in this workgroup.
Definition: Workgroup.h:58
void print(std::ostream &Stream, uint8_t *Data, const Type *Ty)
Recursively print typed data to a stream.
Definition: Object.cpp:196
std::vector< VertexOutput > VertexOutputs
The outputs from the vertex shading stage.
const VertexBindingMap & getVertexBindings() const
Returns the vertex bindings.
Internal structure to hold fragment data.
uint32_t getVertexOffset() const
Returns the offset of the first vertex.
Definition: Commands.h:347