Skip to content

Commit 81ffe74

Browse files
committed
Compute Shader Support
1 parent 2016e42 commit 81ffe74

7 files changed

Lines changed: 232 additions & 2 deletions

File tree

include/gx2/draw.h

100644100755
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@
1212
extern "C" {
1313
#endif
1414

15+
typedef struct GX2DispatchParams GX2DispatchParams;
16+
17+
struct GX2DispatchParams
18+
{
19+
uint32_t numGroupsX;
20+
uint32_t numGroupsY;
21+
uint32_t numGroupsZ;
22+
uint32_t _padding;
23+
};
24+
WUT_CHECK_OFFSET(GX2DispatchParams, 0x00, numGroupsX);
25+
WUT_CHECK_OFFSET(GX2DispatchParams, 0x04, numGroupsY);
26+
WUT_CHECK_OFFSET(GX2DispatchParams, 0x08, numGroupsZ);
27+
WUT_CHECK_OFFSET(GX2DispatchParams, 0x0C, _padding);
28+
WUT_CHECK_SIZE(GX2DispatchParams, 0x10);
29+
1530
void
1631
GX2SetAttribBuffer(uint32_t index,
1732
uint32_t size,
@@ -59,6 +74,9 @@ GX2DrawIndexedImmediateEx(GX2PrimitiveMode mode,
5974
void
6075
GX2SetPrimitiveRestartIndex(uint32_t index);
6176

77+
void
78+
GX2DispatchCompute(GX2DispatchParams *dispatchParams);
79+
6280
#ifdef __cplusplus
6381
}
6482
#endif

include/gx2/shaders.h

100644100755
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ extern "C" {
1818
typedef struct GX2AttribVar GX2AttribVar;
1919
typedef struct GX2AttribStream GX2AttribStream;
2020
typedef struct GX2FetchShader GX2FetchShader;
21+
typedef struct GX2ComputeShader GX2ComputeShader;
2122
typedef struct GX2GeometryShader GX2GeometryShader;
2223
typedef struct GX2LoopVar GX2LoopVar;
2324
typedef struct GX2PixelShader GX2PixelShader;
@@ -336,6 +337,57 @@ WUT_CHECK_OFFSET(GX2GeometryShader, 0x90, streamOutStride);
336337
WUT_CHECK_OFFSET(GX2GeometryShader, 0xA0, gx2rBuffer);
337338
WUT_CHECK_SIZE(GX2GeometryShader, 0xB0);
338339

340+
struct GX2ComputeShader
341+
{
342+
uint32_t regs[12];
343+
344+
uint32_t size;
345+
void *program;
346+
347+
uint32_t uniformBlockCount;
348+
GX2UniformBlock *uniformBlocks;
349+
350+
uint32_t uniformVarCount;
351+
GX2UniformVar *uniformVars;
352+
353+
uint32_t initialValueCount;
354+
GX2UniformInitialValue *initialValues;
355+
356+
uint32_t loopVarCount;
357+
GX2LoopVar *loopVars;
358+
359+
uint32_t samplerVarCount;
360+
GX2SamplerVar *samplerVars;
361+
362+
uint32_t workgroupSizeX;
363+
uint32_t workgroupSizeY;
364+
uint32_t workgroupSizeZ;
365+
BOOL over64Mode;
366+
uint32_t numWavesPerSimd;
367+
368+
GX2RBuffer gx2rBuffer;
369+
};
370+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x00, regs);
371+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x30, size);
372+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x34, program);
373+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x38, uniformBlockCount);
374+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x3C, uniformBlocks);
375+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x40, uniformVarCount);
376+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x44, uniformVars);
377+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x48, initialValueCount);
378+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x4C, initialValues);
379+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x50, loopVarCount);
380+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x54, loopVars);
381+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x58, samplerVarCount);
382+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x5C, samplerVars);
383+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x60, workgroupSizeX);
384+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x64, workgroupSizeY);
385+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x68, workgroupSizeZ);
386+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x6C, over64Mode);
387+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x70, numWavesPerSimd);
388+
WUT_CHECK_OFFSET(GX2ComputeShader, 0x74, gx2rBuffer);
389+
WUT_CHECK_SIZE(GX2ComputeShader, 0x84);
390+
339391
struct GX2AttribStream
340392
{
341393
uint32_t location;
@@ -388,6 +440,9 @@ GX2SetPixelShader(const GX2PixelShader *shader);
388440
void
389441
GX2SetGeometryShader(const GX2GeometryShader *shader);
390442

443+
void
444+
GX2SetComputeShader(const GX2ComputeShader *shader);
445+
391446
void
392447
GX2SetVertexSampler(const GX2Sampler *sampler,
393448
uint32_t id);
@@ -400,6 +455,10 @@ void
400455
GX2SetGeometrySampler(const GX2Sampler *sampler,
401456
uint32_t id);
402457

458+
void
459+
GX2SetComputeSampler(const GX2Sampler *sampler,
460+
uint32_t id);
461+
403462
void
404463
GX2SetVertexUniformReg(uint32_t offset,
405464
uint32_t count,
@@ -425,6 +484,11 @@ GX2SetGeometryUniformBlock(uint32_t location,
425484
uint32_t size,
426485
const void *data);
427486

487+
void
488+
GX2SetComputeUniformBlock(uint32_t location,
489+
uint32_t size,
490+
const void *data);
491+
428492
void
429493
GX2SetShaderModeEx(GX2ShaderMode mode,
430494
uint32_t numVsGpr,
@@ -502,6 +566,19 @@ GX2GetVertexUniformBlock(const GX2VertexShader *shader,
502566
return NULL;
503567
}
504568

569+
static inline GX2UniformBlock *
570+
GX2GetComputeUniformBlock(const GX2ComputeShader *shader,
571+
const char *name)
572+
{
573+
for (uint32_t i = 0; i < shader->uniformBlockCount; ++i) {
574+
if (strcmp(name, shader->uniformBlocks[i].name) == 0) {
575+
return &shader->uniformBlocks[i];
576+
}
577+
}
578+
579+
return NULL;
580+
}
581+
505582
static inline GX2UniformVar *
506583
GX2GetGeometryUniformVar(const GX2GeometryShader *shader,
507584
const char *name)
@@ -541,6 +618,19 @@ GX2GetVertexUniformVar(const GX2VertexShader *shader,
541618
return NULL;
542619
}
543620

621+
static inline GX2UniformVar *
622+
GX2GetComputeUniformVar(const GX2ComputeShader *shader,
623+
const char *name)
624+
{
625+
for (uint32_t i = 0; i < shader->uniformVarCount; ++i) {
626+
if (strcmp(name, shader->uniformVars[i].name) == 0) {
627+
return &shader->uniformVars[i];
628+
}
629+
}
630+
631+
return NULL;
632+
}
633+
544634
static inline void
545635
GX2SetShaderMode(GX2ShaderMode mode)
546636
{

include/gx2/texture.h

100644100755
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ void
4949
GX2SetGeometryTexture(const GX2Texture *texture,
5050
uint32_t unit);
5151

52+
void
53+
GX2SetComputeTexture(const GX2Texture *texture,
54+
uint32_t unit);
55+
5256
#ifdef __cplusplus
5357
}
5458
#endif

libraries/libgfd/include/gfd.h

100644100755
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,23 @@ WUT_CHECK_SIZE(GFDRelocationHeader, 0x28);
111111
char *
112112
GFDGetLastErrorString();
113113

114+
uint32_t
115+
GFDGetComputeShaderCount(const void *file);
116+
117+
uint32_t
118+
GFDGetComputeShaderHeaderSize(uint32_t index,
119+
const void *file);
120+
121+
uint32_t
122+
GFDGetComputeShaderProgramSize(uint32_t index,
123+
const void *file);
124+
125+
BOOL
126+
GFDGetComputeShader(GX2ComputeShader *shader,
127+
void *program,
128+
uint32_t index,
129+
const void *file);
130+
114131
uint32_t
115132
GFDGetGeometryShaderCount(const void *file);
116133

libraries/libgfd/src/gfd.c

100644100755
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,13 +559,16 @@ GFDGetComputeShaderProgramSize(uint32_t index,
559559
file);
560560
}
561561

562-
/*
563562
BOOL
564563
GFDGetComputeShader(GX2ComputeShader *shader,
565564
void *program,
566565
uint32_t index,
567566
const void *file)
568567
{
568+
if (!_GFDCheckShaderAlign(program)) {
569+
return FALSE;
570+
}
571+
569572
return _GFDGetGenericBlock(GFD_BLOCK_COMPUTE_SHADER_HEADER,
570573
shader,
571574
GFD_BLOCK_COMPUTE_SHADER_PROGRAM,
@@ -577,7 +580,6 @@ GFDGetComputeShader(GX2ComputeShader *shader,
577580
index,
578581
file);
579582
}
580-
*/
581583

582584
uint32_t
583585
GFDGetGeometryShaderCount(const void *file)

libraries/libwhb/include/whb/gfx.h

100644100755
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ WHBGfxLoadGFDVertexShader(uint32_t index,
6767
BOOL
6868
WHBGfxFreeVertexShader(GX2VertexShader *shader);
6969

70+
GX2ComputeShader *
71+
WHBGfxLoadGFDComputeShader(uint32_t index,
72+
const void *file);
73+
74+
BOOL
75+
WHBGfxFreeComputeShader(GX2ComputeShader *shader);
76+
7077
BOOL
7178
WHBGfxLoadGFDShaderGroup(WHBGfxShaderGroup *group,
7279
uint32_t index,

libraries/libwhb/src/gfx_shader.c

100644100755
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,98 @@ WHBGfxFreeVertexShader(GX2VertexShader *shader)
192192
return TRUE;
193193
}
194194

195+
GX2ComputeShader *
196+
WHBGfxLoadGFDComputeShader(uint32_t index,
197+
const void *file)
198+
{
199+
uint32_t headerSize, programSize;
200+
GX2ComputeShader *shader = NULL;
201+
void *program = NULL;
202+
203+
if (index >= GFDGetComputeShaderCount(file)) {
204+
WHBLogPrintf("%s: index %u >= %u GFDGetComputeShaderCount(file)",
205+
__FUNCTION__,
206+
index,
207+
GFDGetComputeShaderCount(file));
208+
goto error;
209+
}
210+
211+
headerSize = GFDGetComputeShaderHeaderSize(index, file);
212+
if (!headerSize) {
213+
WHBLogPrintf("%s: headerSize == 0", __FUNCTION__);
214+
goto error;
215+
}
216+
217+
programSize = GFDGetComputeShaderProgramSize(index, file);
218+
if (!programSize) {
219+
WHBLogPrintf("%s: programSize == 0", __FUNCTION__);
220+
goto error;
221+
}
222+
223+
shader = (GX2ComputeShader *)GfxHeapAllocMEM2(headerSize, 64);
224+
if (!shader) {
225+
WHBLogPrintf("%s: GfxHeapAllocMEM2(%u, 64) failed", __FUNCTION__,
226+
headerSize);
227+
goto error;
228+
}
229+
230+
shader->gx2rBuffer.flags = GX2R_RESOURCE_BIND_SHADER_PROGRAM |
231+
GX2R_RESOURCE_USAGE_CPU_READ |
232+
GX2R_RESOURCE_USAGE_CPU_WRITE |
233+
GX2R_RESOURCE_USAGE_GPU_READ;
234+
shader->gx2rBuffer.elemSize = programSize;
235+
shader->gx2rBuffer.elemCount = 1;
236+
shader->gx2rBuffer.buffer = NULL;
237+
if (!GX2RCreateBuffer(&shader->gx2rBuffer)) {
238+
WHBLogPrintf("%s: GX2RCreateBuffer failed with programSize = %u",
239+
__FUNCTION__, programSize);
240+
goto error;
241+
}
242+
243+
program = GX2RLockBufferEx(&shader->gx2rBuffer, 0);
244+
if (!program) {
245+
WHBLogPrintf("%s: GX2RLockBufferEx failed", __FUNCTION__);
246+
goto error;
247+
}
248+
249+
if (!GFDGetComputeShader(shader, program, index, file)) {
250+
WHBLogPrintf("%s: GFDGetComputeShader failed", __FUNCTION__);
251+
GX2RUnlockBufferEx(&shader->gx2rBuffer,
252+
GX2R_RESOURCE_DISABLE_CPU_INVALIDATE |
253+
GX2R_RESOURCE_DISABLE_GPU_INVALIDATE);
254+
goto error;
255+
}
256+
257+
GX2RUnlockBufferEx(&shader->gx2rBuffer, 0);
258+
259+
// For some reason we still need to manually invalidate the buffers,
260+
// even though GX2RUnlockBuffer SHOULD be doing that for us
261+
GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->program, shader->size);
262+
return shader;
263+
264+
error:
265+
if (shader) {
266+
if (shader->gx2rBuffer.buffer) {
267+
GX2RDestroyBufferEx(&shader->gx2rBuffer, 0);
268+
}
269+
270+
GfxHeapFreeMEM2(shader);
271+
}
272+
273+
return NULL;
274+
}
275+
276+
BOOL
277+
WHBGfxFreeComputeShader(GX2ComputeShader *shader)
278+
{
279+
if (shader->gx2rBuffer.buffer) {
280+
GX2RDestroyBufferEx(&shader->gx2rBuffer, 0);
281+
}
282+
283+
GfxHeapFreeMEM2(shader);
284+
return TRUE;
285+
}
286+
195287
BOOL
196288
WHBGfxLoadGFDShaderGroup(WHBGfxShaderGroup *group,
197289
uint32_t index,

0 commit comments

Comments
 (0)