metal : restore original F16 mat-vec multiplication

It works after the norm fixes
This commit is contained in:
Georgi Gerganov 2023-09-07 15:20:07 +03:00
parent ed92c3d4b2
commit 783379670a
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -995,12 +995,8 @@ void ggml_metal_graph_compute(
else if (src0t == GGML_TYPE_Q6_K) { else if (src0t == GGML_TYPE_Q6_K) {
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)]; [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
} else { } else {
[encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0]; int64_t ny = (ne11 + 3)/4;
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)]; [encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
// TODO: this breaks for Q4_0 - understand why and fix it
//int64_t ny = (ne11 + 3)/4;
//[encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
} }
} }
} break; } break;