mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-02 14:54:35 +00:00
7 lines
226 B
Plaintext
7 lines
226 B
Plaintext
|
#include "common.cuh"
|
||
|
|
||
|
void ggml_cuda_flash_attn_ext(
|
||
|
ggml_backend_cuda_context & ctx,
|
||
|
const ggml_tensor * Q, const ggml_tensor * K, const ggml_tensor * V,
|
||
|
const ggml_tensor * mask, ggml_tensor * KQV);
|