mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-30 21:34:36 +00:00
7 lines
226 B
Plaintext
7 lines
226 B
Plaintext
#include "common.cuh"
|
|
|
|
void ggml_cuda_flash_attn_ext(
|
|
ggml_backend_cuda_context & ctx,
|
|
const ggml_tensor * Q, const ggml_tensor * K, const ggml_tensor * V,
|
|
const ggml_tensor * mask, ggml_tensor * KQV);
|