We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent de8f01c commit 46dba9fCopy full SHA for 46dba9f
1 file changed
ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp
@@ -245,7 +245,7 @@ void main() {
245
#endif
246
}
247
[[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) {
248
- Sf[r][c] += ACC_TYPE(dot(Q_cache[r], K_Tf));
+ Sf[r][c] += dot(ACC_TYPEV4(Q_cache[r]), ACC_TYPEV4(K_Tf));
249
250
251
@@ -270,7 +270,7 @@ void main() {
270
271
272
273
- Sf[r][c] += ACC_TYPE(dot(Qf[tile_row(r) * qf_stride + d * D_split + d_tid], K_Tf));
+ Sf[r][c] += dot(ACC_TYPEV4(Qf[tile_row(r) * qf_stride + d * D_split + d_tid]), ACC_TYPEV4(K_Tf));
274
275
276
0 commit comments