diff --git a/kompute/common.comp b/kompute/common.comp index 12fc7d8b5..2e843a878 100644 --- a/kompute/common.comp +++ b/kompute/common.comp @@ -12,6 +12,8 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int8: require #extension GL_EXT_shader_explicit_arithmetic_types_int16: require #extension GL_EXT_control_flow_attributes: enable +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_EXT_debug_printf : enable #define QK4_0 32 #define QR4_0 2 diff --git a/kompute/op_mul_mv_q_n.comp b/kompute/op_mul_mv_q_n.comp index 83de952dd..15bcbf765 100644 --- a/kompute/op_mul_mv_q_n.comp +++ b/kompute/op_mul_mv_q_n.comp @@ -6,9 +6,6 @@ * this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc. */ -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_EXT_debug_printf : enable - void main() { const uint nb = uint(pcs.ne00/BLOCKS_IN_QUANT); const uint r0 = gl_WorkGroupID.x; @@ -27,9 +24,9 @@ void main() { uint yb = y + ix * BLOCKS_IN_QUANT + il; - debugPrintfEXT("gl_NumSubgroups=%d, gl_SubgroupID=%d, gl_SubgroupInvocationID=%d, glSubgroupSize=%d, gl_WorkGroupSize.x=%d, gl_WorkGroupSize.y=%d, gl_WorkGroupSize.z=%d\n", - gl_NumSubgroups, gl_SubgroupID, gl_SubgroupInvocationID, gl_SubgroupSize, - gl_WorkGroupSize.x, gl_WorkGroupSize.y, gl_WorkGroupSize.z); + //debugPrintfEXT("gl_NumSubgroups=%d, gl_SubgroupID=%d, gl_SubgroupInvocationID=%d, glSubgroupSize=%d, gl_WorkGroupSize.x=%d, gl_WorkGroupSize.y=%d, gl_WorkGroupSize.z=%d\n", + // gl_NumSubgroups, gl_SubgroupID, gl_SubgroupInvocationID, gl_SubgroupSize, + // gl_WorkGroupSize.x, gl_WorkGroupSize.y, gl_WorkGroupSize.z); for (uint ib = ix; ib < nb; ib += gl_SubgroupSize/2) { for (int row = 0; row < N_ROWS; row++) { diff --git a/kompute/op_softmax.comp b/kompute/op_softmax.comp index 60456a3bb..d21577ac0 100644 --- a/kompute/op_softmax.comp +++ b/kompute/op_softmax.comp @@ -10,8 +10,6 @@ #include "common.comp" -#extension GL_KHR_shader_subgroup_arithmetic : require - layout(local_size_x_id = 0) in; layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; };