batched-bench : pass custom set of PP, TG and PL

This commit is contained in:
Georgi Gerganov 2023-10-11 19:36:31 +03:00
parent c062ffd18c
commit daeb834da9
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
2 changed files with 40 additions and 2 deletions

View File

@ -10,13 +10,16 @@ There are 2 modes of operation:
- `prompt is shared` - there is a common prompt of size `PP` used by all batches (i.e. `N_KV = PP + B*TG`) - `prompt is shared` - there is a common prompt of size `PP` used by all batches (i.e. `N_KV = PP + B*TG`)
```bash ```bash
./batched-bench MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] ./batched-bench MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] <PP> <TG> <PL>
# LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared # LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared
./batched-bench ./models/llama-7b/ggml-model-f16.gguf 16384 0 99 ./batched-bench ./models/llama-7b/ggml-model-f16.gguf 16384 0 99
# LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared # LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 16384 1 99 ./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 16384 1 99
# custom set of batches
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 2048 0 999 128,256,512 128,256 1,2,4,8,16,32
``` ```
## Sample results ## Sample results

View File

@ -7,11 +7,34 @@
#include <string> #include <string>
#include <vector> #include <vector>
// mutates the input string
static std::vector<int> parse_list(char * p) {
std::vector<int> ret;
char * q = p;
while (*p) {
if (*p == ',') {
*p = '\0';
ret.push_back(std::atoi(q));
q = p + 1;
}
++p;
}
ret.push_back(std::atoi(q));
return ret;
}
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
gpt_params params; gpt_params params;
if (argc == 1 || argv[1][0] == '-') { if (argc == 1 || argv[1][0] == '-') {
printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL]\n" , argv[0]); printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] <PP> <TG> <PL>\n" , argv[0]);
printf(" <PP>, <TG> and PL are comma-separated lists of numbers without spaces\n\n");
printf(" example: %s ggml-model-f16.gguf 2048 0 999 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]);
return 1 ; return 1 ;
} }
@ -40,6 +63,18 @@ int main(int argc, char ** argv) {
n_gpu_layers = std::atoi(argv[4]); n_gpu_layers = std::atoi(argv[4]);
} }
if (argc >= 6) {
n_pp = parse_list(argv[5]);
}
if (argc >= 7) {
n_tg = parse_list(argv[6]);
}
if (argc >= 8) {
n_pl = parse_list(argv[7]);
}
// init LLM // init LLM
llama_backend_init(params.numa); llama_backend_init(params.numa);