cuda : disable BF16 FA

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-11-08 10:27:43 +02:00
parent 5d1a10d275
commit bc143ecf81
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -3159,6 +3159,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
#ifndef FLASH_ATTN_AVAILABLE
return false;
#endif
if (op->src[1]->type == GGML_TYPE_BF16 || op->src[2]->type == GGML_TYPE_BF16) {
return false;
}
if (op->src[0]->ne[0] == 64 && op->src[1]->type == GGML_TYPE_F16) {
return true;
}