.. |
fattn-vec-f16-instance-hs64-f16-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs64-f16-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs64-f16-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs64-f16-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs64-f16-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs64-f16-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-f16-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-f16-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-f16-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-f16-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-f16-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-f16-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_0-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_1-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_0-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_1-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q8_0-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f16-instance-hs256-f16-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs64-f16-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs64-f16-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs64-f16-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs64-f16-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs64-f16-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs64-f16-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-f16-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-f16-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-f16-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-f16-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-f16-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-f16-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_0-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_1-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_0-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_1-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q8_0-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-vec-f32-instance-hs256-f16-f16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-wmma-f16-instance-kqfloat-cpb16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-wmma-f16-instance-kqfloat-cpb32.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-wmma-f16-instance-kqhalf-cpb8.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-wmma-f16-instance-kqhalf-cpb16.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
fattn-wmma-f16-instance-kqhalf-cpb32.cu
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |
generate_cu_files.py
|
CUDA: quantized KV support for FA vec (#7527)
|
2024-06-01 08:44:14 +02:00 |