quantize : improve type name parsing (#9570)
Some checks are pending
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run

quantize : do not ignore invalid types in arg parsing

quantize : ignore case of type and ftype arguments
This commit is contained in:
slaren 2024-09-20 20:55:36 +02:00 committed by GitHub
parent d13edb17ed
commit 63351143b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -63,6 +63,16 @@ static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count"; static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count";
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count"; static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count";
static bool striequals(const char * a, const char * b) {
while (*a && *b) {
if (std::tolower(*a) != std::tolower(*b)) {
return false;
}
a++; b++;
}
return *a == *b;
}
static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
std::string ftype_str; std::string ftype_str;
@ -70,7 +80,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
ftype_str.push_back(std::toupper(ch)); ftype_str.push_back(std::toupper(ch));
} }
for (auto & it : QUANT_OPTIONS) { for (auto & it : QUANT_OPTIONS) {
if (it.name == ftype_str) { if (striequals(it.name.c_str(), ftype_str.c_str())) {
ftype = it.ftype; ftype = it.ftype;
ftype_str_out = it.name; ftype_str_out = it.name;
return true; return true;
@ -225,15 +235,15 @@ static int prepare_imatrix(const std::string & imatrix_file,
} }
static ggml_type parse_ggml_type(const char * arg) { static ggml_type parse_ggml_type(const char * arg) {
ggml_type result = GGML_TYPE_COUNT; for (int i = 0; i < GGML_TYPE_COUNT; ++i) {
for (int j = 0; j < GGML_TYPE_COUNT; ++j) { auto type = (ggml_type)i;
auto type = ggml_type(j);
const auto * name = ggml_type_name(type); const auto * name = ggml_type_name(type);
if (name && strcmp(arg, name) == 0) { if (name && striequals(name, arg)) {
result = type; break; return type;
} }
} }
return result; fprintf(stderr, "%s: invalid ggml_type '%s'\n", __func__, arg);
return GGML_TYPE_COUNT;
} }
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
@ -254,12 +264,18 @@ int main(int argc, char ** argv) {
} else if (strcmp(argv[arg_idx], "--output-tensor-type") == 0) { } else if (strcmp(argv[arg_idx], "--output-tensor-type") == 0) {
if (arg_idx < argc-1) { if (arg_idx < argc-1) {
params.output_tensor_type = parse_ggml_type(argv[++arg_idx]); params.output_tensor_type = parse_ggml_type(argv[++arg_idx]);
if (params.output_tensor_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else { } else {
usage(argv[0]); usage(argv[0]);
} }
} else if (strcmp(argv[arg_idx], "--token-embedding-type") == 0) { } else if (strcmp(argv[arg_idx], "--token-embedding-type") == 0) {
if (arg_idx < argc-1) { if (arg_idx < argc-1) {
params.token_embedding_type = parse_ggml_type(argv[++arg_idx]); params.token_embedding_type = parse_ggml_type(argv[++arg_idx]);
if (params.token_embedding_type == GGML_TYPE_COUNT) {
usage(argv[0]);
}
} else { } else {
usage(argv[0]); usage(argv[0]);
} }