mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
7d1a378b8f
* CUDA: refactor mmq, dmmv, mmvq * fix out-of-bounds write * struct for qk, qr, qi * fix cmake build * mmq_type_traits
11 lines
361 B
Plaintext
11 lines
361 B
Plaintext
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
|
|
|
#include "../fattn-wmma-f16.cuh"
|
|
|
|
DECL_FATTN_WMMA_F16_CASE(64, 16, half);
|
|
DECL_FATTN_WMMA_F16_CASE(80, 16, half);
|
|
DECL_FATTN_WMMA_F16_CASE(96, 16, half);
|
|
DECL_FATTN_WMMA_F16_CASE(112, 16, half);
|
|
DECL_FATTN_WMMA_F16_CASE(128, 16, half);
|
|
DECL_FATTN_WMMA_F16_CASE(256, 16, half);
|