mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 12:10:18 +00:00
llama : arch
This commit is contained in:
parent
7b5b594526
commit
4c5b321042
@ -1 +1,42 @@
|
|||||||
#include "llama-arch.h"
|
#include "llama-arch.h"
|
||||||
|
|
||||||
|
#include "llama-impl.h"
|
||||||
|
|
||||||
|
LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
|
||||||
|
|
||||||
|
std::string LLM_KV::operator()(llm_kv kv) const {
|
||||||
|
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string LLM_TN_IMPL::str() const {
|
||||||
|
if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
|
||||||
|
return "__missing__";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
|
||||||
|
|
||||||
|
if (suffix != nullptr) {
|
||||||
|
name += ".";
|
||||||
|
name += suffix;
|
||||||
|
}
|
||||||
|
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * llm_arch_name(llm_arch arch) {
|
||||||
|
auto it = LLM_ARCH_NAMES.find(arch);
|
||||||
|
if (it == LLM_ARCH_NAMES.end()) {
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
llm_arch llm_arch_from_string(const std::string & name) {
|
||||||
|
for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
|
||||||
|
if (kv.second == name) {
|
||||||
|
return kv.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return LLM_ARCH_UNKNOWN;
|
||||||
|
}
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "llama-impl.h"
|
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
//
|
//
|
||||||
@ -375,13 +373,11 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct LLM_KV {
|
struct LLM_KV {
|
||||||
LLM_KV(llm_arch arch) : arch(arch) {}
|
LLM_KV(llm_arch arch);
|
||||||
|
|
||||||
llm_arch arch;
|
llm_arch arch;
|
||||||
|
|
||||||
std::string operator()(llm_kv kv) const {
|
std::string operator()(llm_kv kv) const;
|
||||||
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llm_tensor {
|
enum llm_tensor {
|
||||||
@ -1589,16 +1585,6 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
|
|||||||
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
|
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
|
||||||
};
|
};
|
||||||
|
|
||||||
static llm_arch llm_arch_from_string(const std::string & name) {
|
|
||||||
for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
|
|
||||||
if (kv.second == name) {
|
|
||||||
return kv.first;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return LLM_ARCH_UNKNOWN;
|
|
||||||
}
|
|
||||||
|
|
||||||
// helper to handle gguf constants
|
// helper to handle gguf constants
|
||||||
// usage:
|
// usage:
|
||||||
//
|
//
|
||||||
@ -1615,20 +1601,7 @@ struct LLM_TN_IMPL {
|
|||||||
const int bid;
|
const int bid;
|
||||||
const int xid;
|
const int xid;
|
||||||
|
|
||||||
std::string str() const {
|
std::string str() const;
|
||||||
if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
|
|
||||||
return "__missing__";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
|
|
||||||
|
|
||||||
if (suffix != nullptr) {
|
|
||||||
name += ".";
|
|
||||||
name += suffix;
|
|
||||||
}
|
|
||||||
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
operator std::string() const {
|
operator std::string() const {
|
||||||
return str();
|
return str();
|
||||||
@ -1657,58 +1630,6 @@ struct LLM_TN {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
const char * llm_arch_name(llm_arch arch);
|
||||||
// load LLaMA models
|
|
||||||
//
|
|
||||||
|
|
||||||
static const char * llama_model_arch_name(llm_arch arch) {
|
|
||||||
auto it = LLM_ARCH_NAMES.find(arch);
|
|
||||||
if (it == LLM_ARCH_NAMES.end()) {
|
|
||||||
return "unknown";
|
|
||||||
}
|
|
||||||
return it->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string llama_model_ftype_name(llama_ftype ftype) {
|
|
||||||
if (ftype & LLAMA_FTYPE_GUESSED) {
|
|
||||||
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (ftype) {
|
|
||||||
case LLAMA_FTYPE_ALL_F32: return "all F32";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_TQ1_0: return "TQ1_0 - 1.69 bpw ternary";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_TQ2_0: return "TQ2_0 - 2.06 bpw ternary";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return "IQ2_XXS - 2.0625 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return "IQ3_XXS - 3.0625 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ1_S: return "IQ1_S - 1.5625 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ1_M: return "IQ1_M - 1.75 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
|
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
|
|
||||||
|
|
||||||
default: return "unknown, may not work";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
llm_arch llm_arch_from_string(const std::string & name);
|
||||||
|
@ -24,22 +24,8 @@ LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
|||||||
void llama_log_internal (ggml_log_level level, const char * format, ...);
|
void llama_log_internal (ggml_log_level level, const char * format, ...);
|
||||||
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
|
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
|
||||||
|
|
||||||
// TODO: move to source
|
|
||||||
LLAMA_ATTRIBUTE_FORMAT(1, 2)
|
LLAMA_ATTRIBUTE_FORMAT(1, 2)
|
||||||
static std::string format(const char * fmt, ...) {
|
std::string format(const char * fmt, ...);
|
||||||
va_list ap;
|
|
||||||
va_list ap2;
|
|
||||||
va_start(ap, fmt);
|
|
||||||
va_copy(ap2, ap);
|
|
||||||
int size = vsnprintf(NULL, 0, fmt, ap);
|
|
||||||
GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
|
|
||||||
std::vector<char> buf(size + 1);
|
|
||||||
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
|
||||||
GGML_ASSERT(size2 == size);
|
|
||||||
va_end(ap2);
|
|
||||||
va_end(ap);
|
|
||||||
return std::string(buf.data(), size);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
||||||
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
||||||
|
@ -1 +1,3 @@
|
|||||||
#include "llama-mmap.h"
|
#include "llama-mmap.h"
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,8 +4,6 @@
|
|||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#include <cstdio>
|
|
||||||
|
|
||||||
#ifdef __has_include
|
#ifdef __has_include
|
||||||
#if __has_include(<unistd.h>)
|
#if __has_include(<unistd.h>)
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
@ -1 +1,44 @@
|
|||||||
#include "llama-model.h"
|
#include "llama-model.h"
|
||||||
|
|
||||||
|
std::string llama_model_ftype_name(llama_ftype ftype) {
|
||||||
|
if (ftype & LLAMA_FTYPE_GUESSED) {
|
||||||
|
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ftype) {
|
||||||
|
case LLAMA_FTYPE_ALL_F32: return "all F32";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_TQ1_0: return "TQ1_0 - 1.69 bpw ternary";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_TQ2_0: return "TQ2_0 - 2.06 bpw ternary";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return "IQ2_XXS - 2.0625 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return "IQ3_XXS - 3.0625 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ1_S: return "IQ1_S - 1.5625 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ1_M: return "IQ1_M - 1.75 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
|
||||||
|
|
||||||
|
default: return "unknown, may not work";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -648,3 +648,5 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b
|
|||||||
throw std::runtime_error(format("no suitable buffer type found"));
|
throw std::runtime_error(format("no suitable buffer type found"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::string llama_model_ftype_name(llama_ftype ftype);
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "llama-impl.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -59,6 +59,21 @@
|
|||||||
// helpers
|
// helpers
|
||||||
//
|
//
|
||||||
|
|
||||||
|
std::string format(const char * fmt, ...) {
|
||||||
|
va_list ap;
|
||||||
|
va_list ap2;
|
||||||
|
va_start(ap, fmt);
|
||||||
|
va_copy(ap2, ap);
|
||||||
|
int size = vsnprintf(NULL, 0, fmt, ap);
|
||||||
|
GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
|
||||||
|
std::vector<char> buf(size + 1);
|
||||||
|
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
||||||
|
GGML_ASSERT(size2 == size);
|
||||||
|
va_end(ap2);
|
||||||
|
va_end(ap);
|
||||||
|
return std::string(buf.data(), size);
|
||||||
|
}
|
||||||
|
|
||||||
// trim whitespace from the beginning and end of a string
|
// trim whitespace from the beginning and end of a string
|
||||||
static std::string trim(const std::string & str) {
|
static std::string trim(const std::string & str) {
|
||||||
size_t start = 0;
|
size_t start = 0;
|
||||||
@ -16673,9 +16688,9 @@ int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int3
|
|||||||
|
|
||||||
int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
|
int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
|
||||||
return snprintf(buf, buf_size, "%s %s %s",
|
return snprintf(buf, buf_size, "%s %s %s",
|
||||||
llama_model_arch_name(model->arch),
|
llm_arch_name(model->arch), // TODO: llama_model_arch_name(model)
|
||||||
llama_model_type_name(model->type),
|
llama_model_type_name(model->type), // TODO: llama_model_type_name(model)
|
||||||
llama_model_ftype_name(model->ftype).c_str());
|
llama_model_ftype_name(model->ftype).c_str()); // TODO: llama_model_ftype_name(model)
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t llama_model_size(const struct llama_model * model) {
|
uint64_t llama_model_size(const struct llama_model * model) {
|
||||||
|
Loading…
Reference in New Issue
Block a user